summaryrefslogtreecommitdiff
path: root/src/panfrost/util
diff options
context:
space:
mode:
Diffstat (limited to 'src/panfrost/util')
-rw-r--r--src/panfrost/util/lcra.c287
-rw-r--r--src/panfrost/util/lcra.h119
-rw-r--r--src/panfrost/util/meson.build10
-rw-r--r--src/panfrost/util/nir_mod_helpers.c128
-rw-r--r--src/panfrost/util/pan_collect_varyings.c190
-rw-r--r--src/panfrost/util/pan_ir.c180
-rw-r--r--src/panfrost/util/pan_ir.h564
-rw-r--r--src/panfrost/util/pan_liveness.c144
-rw-r--r--src/panfrost/util/pan_lower_64bit_intrin.c78
-rw-r--r--src/panfrost/util/pan_lower_framebuffer.c862
-rw-r--r--src/panfrost/util/pan_lower_framebuffer.h16
-rw-r--r--src/panfrost/util/pan_lower_helper_invocation.c30
-rw-r--r--src/panfrost/util/pan_lower_image_index.c55
-rw-r--r--src/panfrost/util/pan_lower_image_ms.c72
-rw-r--r--src/panfrost/util/pan_lower_sample_position.c51
-rw-r--r--src/panfrost/util/pan_lower_store_component.c98
-rw-r--r--src/panfrost/util/pan_lower_writeout.c306
-rw-r--r--src/panfrost/util/pan_lower_xfb.c105
-rw-r--r--src/panfrost/util/pan_sysval.c162
19 files changed, 1835 insertions, 1622 deletions
diff --git a/src/panfrost/util/lcra.c b/src/panfrost/util/lcra.c
index 45cff37e61f..00585c646a6 100644
--- a/src/panfrost/util/lcra.c
+++ b/src/panfrost/util/lcra.c
@@ -24,14 +24,14 @@
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
-#include <stdio.h>
+#include "lcra.h"
#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <limits.h>
#include "util/macros.h"
#include "util/u_math.h"
-#include "lcra.h"
/* This module is the reference implementation of "Linearly Constrained
* Register Allocation". The paper is available in PDF form
@@ -40,161 +40,168 @@
*/
struct lcra_state *
-lcra_alloc_equations(
- unsigned node_count, unsigned class_count)
+lcra_alloc_equations(unsigned node_count, unsigned class_count)
{
- struct lcra_state *l = calloc(1, sizeof(*l));
+ struct lcra_state *l = calloc(1, sizeof(*l));
- l->node_count = node_count;
- l->class_count = class_count;
+ l->node_count = node_count;
+ l->class_count = class_count;
- l->alignment = calloc(sizeof(l->alignment[0]), node_count);
- l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
- l->modulus = calloc(sizeof(l->modulus[0]), node_count);
- l->class = calloc(sizeof(l->class[0]), node_count);
- l->class_start = calloc(sizeof(l->class_start[0]), class_count);
- l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
- l->class_size = calloc(sizeof(l->class_size[0]), class_count);
- l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
- l->solutions = calloc(sizeof(l->solutions[0]), node_count);
+ l->alignment = calloc(sizeof(l->alignment[0]), node_count);
+ l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
+ l->modulus = calloc(sizeof(l->modulus[0]), node_count);
+ l->class = calloc(sizeof(l->class[0]), node_count);
+ l->class_start = calloc(sizeof(l->class_start[0]), class_count);
+ l->class_disjoint =
+ calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
+ l->class_size = calloc(sizeof(l->class_size[0]), class_count);
+ l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
+ l->solutions = calloc(sizeof(l->solutions[0]), node_count);
- memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
+ memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
- return l;
+ return l;
}
void
lcra_free(struct lcra_state *l)
{
- if (!l)
- return;
-
- free(l->alignment);
- free(l->linear);
- free(l->modulus);
- free(l->class);
- free(l->class_start);
- free(l->class_disjoint);
- free(l->class_size);
- free(l->spill_cost);
- free(l->solutions);
-
- free(l);
+ if (!l)
+ return;
+
+ free(l->alignment);
+ free(l->linear);
+ free(l->modulus);
+ free(l->class);
+ free(l->class_start);
+ free(l->class_disjoint);
+ free(l->class_size);
+ free(l->spill_cost);
+ free(l->solutions);
+
+ free(l);
}
void
-lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound)
+lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2,
+ unsigned bound)
{
- l->alignment[node] = (align_log2 + 1) | (bound << 16);
+ l->alignment[node] = (align_log2 + 1) | (bound << 16);
}
void
lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
{
- l->class_disjoint[(c1 * l->class_count) + c2] = true;
- l->class_disjoint[(c2 * l->class_count) + c1] = true;
+ l->class_disjoint[(c1 * l->class_count) + c2] = true;
+ l->class_disjoint[(c2 * l->class_count) + c1] = true;
}
void
lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
{
- if (node < l->node_count && l->alignment[node]) {
- unsigned BA = l->alignment[node];
- unsigned alignment = (BA & 0xffff) - 1;
- unsigned bound = BA >> 16;
- l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
- }
+ if (node < l->node_count && l->alignment[node]) {
+ unsigned BA = l->alignment[node];
+ unsigned alignment = (BA & 0xffff) - 1;
+ unsigned bound = BA >> 16;
+ l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
+ }
}
void
-lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
+lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
+ unsigned j, unsigned cmask_j)
{
- if (i == j)
- return;
+ if (i == j)
+ return;
- if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
- return;
+ if (l->class_disjoint[(l->class[i] * l -> class_count) + l->class[j]])
+ return;
- uint32_t constraint_fw = 0;
- uint32_t constraint_bw = 0;
+ uint32_t constraint_fw = 0;
+ uint32_t constraint_bw = 0;
- for (unsigned D = 0; D < 16; ++D) {
- if (cmask_i & (cmask_j << D)) {
- constraint_bw |= (1 << (15 + D));
- constraint_fw |= (1 << (15 - D));
- }
+ for (unsigned D = 0; D < 16; ++D) {
+ if (cmask_i & (cmask_j << D)) {
+ constraint_bw |= (1 << (15 + D));
+ constraint_fw |= (1 << (15 - D));
+ }
- if (cmask_i & (cmask_j >> D)) {
- constraint_fw |= (1 << (15 + D));
- constraint_bw |= (1 << (15 - D));
- }
- }
+ if (cmask_i & (cmask_j >> D)) {
+ constraint_fw |= (1 << (15 + D));
+ constraint_bw |= (1 << (15 - D));
+ }
+ }
- l->linear[j * l->node_count + i] |= constraint_fw;
- l->linear[i * l->node_count + j] |= constraint_bw;
+ l->linear[j * l->node_count + i] |= constraint_fw;
+ l->linear[i * l->node_count + j] |= constraint_bw;
}
static bool
lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
{
- unsigned *row = &l->linear[i * l->node_count];
- signed constant = solutions[i];
+ unsigned *row = &l->linear[i * l->node_count];
+ signed constant = solutions[i];
- for (unsigned j = 0; j < l->node_count; ++j) {
- if (solutions[j] == ~0) continue;
+ for (unsigned j = 0; j < l->node_count; ++j) {
+ if (solutions[j] == ~0)
+ continue;
- signed lhs = solutions[j] - constant;
+ signed lhs = solutions[j] - constant;
- if (lhs < -15 || lhs > 15)
- continue;
+ if (lhs < -15 || lhs > 15)
+ continue;
- if (row[j] & (1 << (lhs + 15)))
- return false;
- }
+ if (row[j] & (1 << (lhs + 15)))
+ return false;
+ }
- return true;
+ return true;
}
bool
lcra_solve(struct lcra_state *l)
{
- for (unsigned step = 0; step < l->node_count; ++step) {
- if (l->solutions[step] != ~0) continue;
- if (l->alignment[step] == 0) continue;
-
- unsigned _class = l->class[step];
- unsigned class_start = l->class_start[_class];
-
- unsigned BA = l->alignment[step];
- unsigned shift = (BA & 0xffff) - 1;
- unsigned bound = BA >> 16;
-
- unsigned P = bound >> shift;
- unsigned Q = l->modulus[step];
- unsigned r_max = l->class_size[_class];
- unsigned k_max = r_max >> shift;
- unsigned m_max = k_max / P;
- bool succ = false;
-
- for (unsigned m = 0; m < m_max; ++m) {
- for (unsigned n = 0; n < Q; ++n) {
- l->solutions[step] = ((m * P + n) << shift) + class_start;
- succ = lcra_test_linear(l, l->solutions, step);
-
- if (succ) break;
- }
-
- if (succ) break;
- }
-
- /* Out of registers - prepare to spill */
- if (!succ) {
- l->spill_class = l->class[step];
- return false;
- }
- }
-
- return true;
+ for (unsigned step = 0; step < l->node_count; ++step) {
+ if (l->solutions[step] != ~0)
+ continue;
+ if (l->alignment[step] == 0)
+ continue;
+
+ unsigned _class = l->class[step];
+ unsigned class_start = l->class_start[_class];
+
+ unsigned BA = l->alignment[step];
+ unsigned shift = (BA & 0xffff) - 1;
+ unsigned bound = BA >> 16;
+
+ unsigned P = bound >> shift;
+ unsigned Q = l->modulus[step];
+ unsigned r_max = l->class_size[_class];
+ unsigned k_max = r_max >> shift;
+ unsigned m_max = k_max / P;
+ bool succ = false;
+
+ for (unsigned m = 0; m < m_max; ++m) {
+ for (unsigned n = 0; n < Q; ++n) {
+ l->solutions[step] = ((m * P + n) << shift) + class_start;
+ succ = lcra_test_linear(l, l->solutions, step);
+
+ if (succ)
+ break;
+ }
+
+ if (succ)
+ break;
+ }
+
+ /* Out of registers - prepare to spill */
+ if (!succ) {
+ l->spill_class = l->class[step];
+ return false;
+ }
+ }
+
+ return true;
}
/* Register spilling is implemented with a cost-benefit system. Costs are set
@@ -203,45 +210,47 @@ lcra_solve(struct lcra_state *l)
void
lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
{
- if (node < l->node_count)
- l->spill_cost[node] = cost;
+ if (node < l->node_count)
+ l->spill_cost[node] = cost;
}
static unsigned
lcra_count_constraints(struct lcra_state *l, unsigned i)
{
- unsigned count = 0;
- unsigned *constraints = &l->linear[i * l->node_count];
+ unsigned count = 0;
+ unsigned *constraints = &l->linear[i * l->node_count];
- for (unsigned j = 0; j < l->node_count; ++j)
- count += util_bitcount(constraints[j]);
+ for (unsigned j = 0; j < l->node_count; ++j)
+ count += util_bitcount(constraints[j]);
- return count;
+ return count;
}
signed
lcra_get_best_spill_node(struct lcra_state *l)
{
- /* If there are no constraints on a node, do not pick it to spill under
- * any circumstance, or else we would hang rather than fail RA */
- float best_benefit = 0.0;
- signed best_node = -1;
-
- for (unsigned i = 0; i < l->node_count; ++i) {
- /* Find spillable nodes */
- if (l->class[i] != l->spill_class) continue;
- if (l->spill_cost[i] < 0) continue;
-
- /* Adapted from Chaitin's heuristic */
- float constraints = lcra_count_constraints(l, i);
- float cost = (l->spill_cost[i] + 1);
- float benefit = constraints / cost;
-
- if (benefit > best_benefit) {
- best_benefit = benefit;
- best_node = i;
- }
- }
-
- return best_node;
+ /* If there are no constraints on a node, do not pick it to spill under
+ * any circumstance, or else we would hang rather than fail RA */
+ float best_benefit = 0.0;
+ signed best_node = -1;
+
+ for (unsigned i = 0; i < l->node_count; ++i) {
+ /* Find spillable nodes */
+ if (l->class[i] != l->spill_class)
+ continue;
+ if (l->spill_cost[i] < 0)
+ continue;
+
+ /* Adapted from Chaitin's heuristic */
+ float constraints = lcra_count_constraints(l, i);
+ float cost = (l->spill_cost[i] + 1);
+ float benefit = constraints / cost;
+
+ if (benefit > best_benefit) {
+ best_benefit = benefit;
+ best_node = i;
+ }
+ }
+
+ return best_node;
}
diff --git a/src/panfrost/util/lcra.h b/src/panfrost/util/lcra.h
index fd47fdc3543..0b1ed13400f 100644
--- a/src/panfrost/util/lcra.h
+++ b/src/panfrost/util/lcra.h
@@ -31,78 +31,71 @@
#include <stdint.h>
struct lcra_state {
- unsigned node_count;
-
- /* Alignment for node in log2(bytes)+1. Since alignment must be
- * non-negative power-of-two, the elements are strictly positive
- * integers. Zero is the sentinel for a missing node. In upper word,
- * bound. */
- unsigned *alignment;
-
- /* Linear constraints imposed. Nested array sized upfront, organized as
- * linear[node_left][node_right]. That is, calculate indices as:
- *
- * Each element is itself a bit field denoting whether (c_j - c_i) bias
- * is present or not, including negative biases.
- *
- * Note for Midgard, there are 16 components so the bias is in range
- * [-15, 15] so encoded by 32-bit field. */
-
- uint32_t *linear;
-
- /* Per node max modulus constraints */
- uint8_t *modulus;
-
- /* Classes allow nodes to be partitioned with a starting register.
- * Classes cannot interfere; that is, they are true partitions in the
- * usual sense of the word. class_count is the number of classes.
- * class[] is indexed by a node to get the mapped class. class_start is
- * biased to all solutions in the class. */
-
- unsigned class_count;
- unsigned *class;
- unsigned *class_start;
- unsigned *class_size;
- bool *class_disjoint;
-
- /* Before solving, forced registers; after solving, solutions. */
- unsigned *solutions;
-
- /* For register spilling, the costs to spill nodes (as set by the user)
- * are in spill_cost[], negative if a node is unspillable. Internally,
- * spill_class specifies which class to spill (whichever class failed
- * to allocate) */
-
- signed *spill_cost;
- unsigned spill_class;
+ unsigned node_count;
+
+ /* Alignment for node in log2(bytes)+1. Since alignment must be
+ * non-negative power-of-two, the elements are strictly positive
+ * integers. Zero is the sentinel for a missing node. In upper word,
+ * bound. */
+ unsigned *alignment;
+
+ /* Linear constraints imposed. Nested array sized upfront, organized as
+ * linear[node_left][node_right]. That is, calculate indices as:
+ *
+ * Each element is itself a bit field denoting whether (c_j - c_i) bias
+ * is present or not, including negative biases.
+ *
+ * Note for Midgard, there are 16 components so the bias is in range
+ * [-15, 15] so encoded by 32-bit field. */
+
+ uint32_t *linear;
+
+ /* Per node max modulus constraints */
+ uint8_t *modulus;
+
+ /* Classes allow nodes to be partitioned with a starting register.
+ * Classes cannot interfere; that is, they are true partitions in the
+ * usual sense of the word. class_count is the number of classes.
+ * class[] is indexed by a node to get the mapped class. class_start is
+ * biased to all solutions in the class. */
+
+ unsigned class_count;
+ unsigned *class;
+ unsigned *class_start;
+ unsigned *class_size;
+ bool *class_disjoint;
+
+ /* Before solving, forced registers; after solving, solutions. */
+ unsigned *solutions;
+
+ /* For register spilling, the costs to spill nodes (as set by the user)
+ * are in spill_cost[], negative if a node is unspillable. Internally,
+ * spill_class specifies which class to spill (whichever class failed
+ * to allocate) */
+
+ signed *spill_cost;
+ unsigned spill_class;
};
-struct lcra_state *
-lcra_alloc_equations(
- unsigned node_count, unsigned class_count);
+struct lcra_state *lcra_alloc_equations(unsigned node_count,
+ unsigned class_count);
-void
-lcra_free(struct lcra_state *l);
+void lcra_free(struct lcra_state *l);
-void
-lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2);
+void lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2);
-void
-lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound);
+void lcra_set_alignment(struct lcra_state *l, unsigned node,
+ unsigned align_log2, unsigned bound);
-void
-lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);
+void lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);
-void
-lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j);
+void lcra_add_node_interference(struct lcra_state *l, unsigned i,
+ unsigned cmask_i, unsigned j, unsigned cmask_j);
-bool
-lcra_solve(struct lcra_state *l);
+bool lcra_solve(struct lcra_state *l);
-void
-lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost);
+void lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost);
-signed
-lcra_get_best_spill_node(struct lcra_state *l);
+signed lcra_get_best_spill_node(struct lcra_state *l);
#endif
diff --git a/src/panfrost/util/meson.build b/src/panfrost/util/meson.build
index 75f9024eafb..d2d730fa587 100644
--- a/src/panfrost/util/meson.build
+++ b/src/panfrost/util/meson.build
@@ -22,22 +22,24 @@
libpanfrost_util_files = files(
'lcra.c',
'lcra.h',
- 'nir_mod_helpers.c',
+ 'pan_collect_varyings.c',
'pan_ir.c',
'pan_ir.h',
'pan_liveness.c',
'pan_lower_framebuffer.c',
'pan_lower_helper_invocation.c',
+ 'pan_lower_image_ms.c',
+ 'pan_lower_image_index.c',
'pan_lower_sample_position.c',
+ 'pan_lower_store_component.c',
'pan_lower_writeout.c',
- 'pan_lower_64bit_intrin.c',
- 'pan_sysval.c',
+ 'pan_lower_xfb.c',
)
libpanfrost_util = static_library(
'panfrost_util',
[libpanfrost_util_files],
- include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw],
+ include_directories : [inc_include, inc_src, inc_panfrost_hw],
dependencies: [idep_nir],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
diff --git a/src/panfrost/util/nir_mod_helpers.c b/src/panfrost/util/nir_mod_helpers.c
deleted file mode 100644
index 2fe7b4fabe5..00000000000
--- a/src/panfrost/util/nir_mod_helpers.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2020 Collabora, Ltd.
- * Copyright (C) 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Alyssa Rosenzweig <alyssa@collabora.com>
- * Jason Ekstrand (jason@jlekstrand.net)
- *
- */
-
-#include "nir.h"
-#include "pan_ir.h"
-
-/* Check if a given ALU source is the result of a particular componentwise 1-op
- * ALU source (principally fneg or fabs). If so, return true and rewrite the
- * source to be the argument, respecting swizzles as needed. If not (or it
- * cannot be proven), return false and leave the source untouched.
-*/
-
-bool
-pan_has_source_mod(nir_alu_src *src, nir_op op)
-{
- if (!src->src.is_ssa || src->src.ssa->parent_instr->type != nir_instr_type_alu)
- return false;
-
- nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
-
- if (alu->op != op)
- return false;
-
- /* This only works for unary ops */
- assert(nir_op_infos[op].num_inputs == 1);
-
- /* If the copied source is not SSA, moving it might not be valid */
- if (!alu->src[0].src.is_ssa)
- return false;
-
- /* Okay - we've found the modifier we wanted. Let's construct the new ALU
- * src. In a scalar world, this is just psrc, but for vector archs we need
- * to respect the swizzle, so we compose.
- */
-
- nir_alu_src nsrc = {
- .src = alu->src[0].src,
- };
-
- for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) {
- /* (a o b)(i) = a(b(i)) ... swizzle composition is intense. */
- nsrc.swizzle[i] = alu->src[0].swizzle[src->swizzle[i]];
- }
-
- *src = nsrc;
- return true;
-}
-
-/* Check if a given instruction's result will be fed into a
- * componentwise 1-op ALU instruction (principally fsat without
- * swizzles). If so, return true and rewrite the destination. The
- * backend will need to track the new destinations to avoid
- * incorrect double-emits. */
-
-bool
-pan_has_dest_mod(nir_dest **odest, nir_op op)
-{
- /* This only works for unary ops */
- assert(nir_op_infos[op].num_inputs == 1);
-
- /* If not SSA, this might not be legal */
- nir_dest *dest = *odest;
- if (!dest->is_ssa)
- return false;
-
- /* Check the uses. We want a single use, with the op `op` */
- if (!list_is_empty(&dest->ssa.if_uses))
- return false;
-
- if (!list_is_singular(&dest->ssa.uses))
- return false;
-
- nir_src *use = list_first_entry(&dest->ssa.uses, nir_src, use_link);
- nir_instr *parent = use->parent_instr;
-
- /* Check if the op is `op` */
- if (parent->type != nir_instr_type_alu)
- return false;
-
- nir_alu_instr *alu = nir_instr_as_alu(parent);
- if (alu->op != op)
- return false;
-
- /* We can't do expansions without a move in the middle */
- unsigned nr_components = nir_dest_num_components(alu->dest.dest);
-
- if (nir_dest_num_components(*dest) != nr_components)
- return false;
-
- /* We don't handle swizzles here, so check for the identity */
- for (unsigned i = 0; i < nr_components; ++i) {
- if (alu->src[0].swizzle[i] != i)
- return false;
- }
-
- if (!alu->dest.dest.is_ssa)
- return false;
-
- /* Otherwise, we're good */
- *odest = &alu->dest.dest;
- return true;
-}
diff --git a/src/panfrost/util/pan_collect_varyings.c b/src/panfrost/util/pan_collect_varyings.c
new file mode 100644
index 00000000000..0134ecfb67b
--- /dev/null
+++ b/src/panfrost/util/pan_collect_varyings.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
+ * Copyright (C) 2019-2022 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
+
+static enum pipe_format
+varying_format(nir_alu_type t, unsigned ncomps)
+{
+ assert(ncomps >= 1 && ncomps <= 4);
+
+#define VARYING_FORMAT(ntype, nsz, ptype, psz) \
+ { \
+ .type = nir_type_##ntype##nsz, .formats = { \
+ PIPE_FORMAT_R##psz##_##ptype, \
+ PIPE_FORMAT_R##psz##G##psz##_##ptype, \
+ PIPE_FORMAT_R##psz##G##psz##B##psz##_##ptype, \
+ PIPE_FORMAT_R##psz##G##psz##B##psz##A##psz##_##ptype, \
+ } \
+ }
+
+ static const struct {
+ nir_alu_type type;
+ enum pipe_format formats[4];
+ } conv[] = {
+ VARYING_FORMAT(float, 32, FLOAT, 32),
+ VARYING_FORMAT(uint, 32, UINT, 32),
+ VARYING_FORMAT(float, 16, FLOAT, 16),
+ };
+#undef VARYING_FORMAT
+
+ assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));
+
+ for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {
+ if (conv[i].type == t)
+ return conv[i].formats[ncomps - 1];
+ }
+
+ unreachable("Invalid type");
+}
+
+struct slot_info {
+ nir_alu_type type;
+ unsigned count;
+ unsigned index;
+};
+
+static bool
+walk_varyings(UNUSED nir_builder *b, nir_instr *instr, void *data)
+{
+ struct slot_info *slots = data;
+
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ unsigned count;
+
+ /* Only consider intrinsics that access varyings */
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_output:
+ if (b->shader->info.stage != MESA_SHADER_VERTEX)
+ return false;
+
+ count = nir_src_num_components(intr->src[0]);
+ break;
+
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_interpolated_input:
+ if (b->shader->info.stage != MESA_SHADER_FRAGMENT)
+ return false;
+
+ count = intr->def.num_components;
+ break;
+
+ default:
+ return false;
+ }
+
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+
+ if (sem.no_varying)
+ return false;
+
+ /* In a fragment shader, flat shading is lowered to load_input but
+ * interpolation is lowered to load_interpolated_input, so we can check
+ * the intrinsic to distinguish.
+ *
+ * In a vertex shader, we consider everything flat, as the information
+ * will not contribute to the final linked varyings -- flatness is used
+ * only to determine the type, and the GL linker uses the type from the
+ * fragment shader instead.
+ */
+ bool flat = (intr->intrinsic != nir_intrinsic_load_interpolated_input);
+ nir_alu_type type = flat ? nir_type_uint : nir_type_float;
+
+ /* Demote interpolated float varyings to fp16 where possible. We do not
+ * demote flat varyings, including integer varyings, due to various
+ * issues with the Midgard hardware behaviour and TGSI shaders, as well
+ * as having no demonstrable benefit in practice.
+ */
+ if (type == nir_type_float && sem.medium_precision)
+ type |= 16;
+ else
+ type |= 32;
+
+ /* Count currently contains the number of components accessed by this
+ * intrinsics. However, we may be accessing a fractional location,
+ * indicating by the NIR component. Add that in. The final value be the
+ * maximum (component + count), an upper bound on the number of
+ * components possibly used.
+ */
+ count += nir_intrinsic_component(intr);
+
+ /* Consider each slot separately */
+ for (unsigned offset = 0; offset < sem.num_slots; ++offset) {
+ unsigned location = sem.location + offset;
+ unsigned index = pan_res_handle_get_index(nir_intrinsic_base(intr)) + offset;
+
+ if (slots[location].type) {
+ assert(slots[location].type == type);
+ assert(slots[location].index == index);
+ } else {
+ slots[location].type = type;
+ slots[location].index = index;
+ }
+
+ slots[location].count = MAX2(slots[location].count, count);
+ }
+
+ return false;
+}
+
+void
+pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info)
+{
+ if (s->info.stage != MESA_SHADER_VERTEX &&
+ s->info.stage != MESA_SHADER_FRAGMENT)
+ return;
+
+ struct slot_info slots[64] = {0};
+ nir_shader_instructions_pass(s, walk_varyings, nir_metadata_all, slots);
+
+ struct pan_shader_varying *varyings = (s->info.stage == MESA_SHADER_VERTEX)
+ ? info->varyings.output
+ : info->varyings.input;
+
+ unsigned count = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(slots); ++i) {
+ if (!slots[i].type)
+ continue;
+
+ enum pipe_format format = varying_format(slots[i].type, slots[i].count);
+ assert(format != PIPE_FORMAT_NONE);
+
+ unsigned index = slots[i].index;
+ count = MAX2(count, index + 1);
+
+ varyings[index].location = i;
+ varyings[index].format = format;
+ }
+
+ if (s->info.stage == MESA_SHADER_VERTEX)
+ info->varyings.output_count = count;
+ else
+ info->varyings.input_count = count;
+}
diff --git a/src/panfrost/util/pan_ir.c b/src/panfrost/util/pan_ir.c
index c469274933f..8524e08bc84 100644
--- a/src/panfrost/util/pan_ir.c
+++ b/src/panfrost/util/pan_ir.c
@@ -32,73 +32,66 @@
uint16_t
pan_to_bytemask(unsigned bytes, unsigned mask)
{
- switch (bytes) {
- case 0:
- assert(mask == 0);
- return 0;
-
- case 8:
- return mask;
-
- case 16: {
- unsigned space =
- (mask & 0x1) |
- ((mask & 0x2) << (2 - 1)) |
- ((mask & 0x4) << (4 - 2)) |
- ((mask & 0x8) << (6 - 3)) |
- ((mask & 0x10) << (8 - 4)) |
- ((mask & 0x20) << (10 - 5)) |
- ((mask & 0x40) << (12 - 6)) |
- ((mask & 0x80) << (14 - 7));
-
- return space | (space << 1);
- }
-
- case 32: {
- unsigned space =
- (mask & 0x1) |
- ((mask & 0x2) << (4 - 1)) |
- ((mask & 0x4) << (8 - 2)) |
- ((mask & 0x8) << (12 - 3));
-
- return space | (space << 1) | (space << 2) | (space << 3);
- }
-
- case 64: {
- unsigned A = (mask & 0x1) ? 0xFF : 0x00;
- unsigned B = (mask & 0x2) ? 0xFF : 0x00;
- return A | (B << 8);
- }
-
- default:
- unreachable("Invalid register mode");
- }
+ switch (bytes) {
+ case 0:
+ assert(mask == 0);
+ return 0;
+
+ case 8:
+ return mask;
+
+ case 16: {
+ unsigned space =
+ (mask & 0x1) | ((mask & 0x2) << (2 - 1)) | ((mask & 0x4) << (4 - 2)) |
+ ((mask & 0x8) << (6 - 3)) | ((mask & 0x10) << (8 - 4)) |
+ ((mask & 0x20) << (10 - 5)) | ((mask & 0x40) << (12 - 6)) |
+ ((mask & 0x80) << (14 - 7));
+
+ return space | (space << 1);
+ }
+
+ case 32: {
+ unsigned space = (mask & 0x1) | ((mask & 0x2) << (4 - 1)) |
+ ((mask & 0x4) << (8 - 2)) | ((mask & 0x8) << (12 - 3));
+
+ return space | (space << 1) | (space << 2) | (space << 3);
+ }
+
+ case 64: {
+ unsigned A = (mask & 0x1) ? 0xFF : 0x00;
+ unsigned B = (mask & 0x2) ? 0xFF : 0x00;
+ return A | (B << 8);
+ }
+
+ default:
+ unreachable("Invalid register mode");
+ }
}
void
pan_block_add_successor(pan_block *block, pan_block *successor)
{
- assert(block);
- assert(successor);
-
- /* Cull impossible edges */
- if (block->unconditional_jumps)
- return;
-
- for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
- if (block->successors[i]) {
- if (block->successors[i] == successor)
- return;
- else
- continue;
- }
-
- block->successors[i] = successor;
- _mesa_set_add(successor->predecessors, block);
- return;
- }
-
- unreachable("Too many successors");
+ assert(block);
+ assert(successor);
+
+ /* Cull impossible edges */
+ if (block->unconditional_jumps)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
+ if (block->successors[i]) {
+ if (block->successors[i] == successor)
+ return;
+ else
+ continue;
+ }
+
+ block->successors[i] = successor;
+ _mesa_set_add(successor->predecessors, block);
+ return;
+ }
+
+ unreachable("Too many successors");
}
/* Prints a NIR ALU type in Bifrost-style ".f32" ".i8" etc */
@@ -106,45 +99,42 @@ pan_block_add_successor(pan_block *block, pan_block *successor)
void
pan_print_alu_type(nir_alu_type t, FILE *fp)
{
- unsigned size = nir_alu_type_get_type_size(t);
- nir_alu_type base = nir_alu_type_get_base_type(t);
-
- switch (base) {
- case nir_type_int:
- fprintf(fp, ".i");
- break;
- case nir_type_uint:
- fprintf(fp, ".u");
- break;
- case nir_type_bool:
- fprintf(fp, ".b");
- break;
- case nir_type_float:
- fprintf(fp, ".f");
- break;
- default:
- fprintf(fp, ".unknown");
- break;
- }
-
- fprintf(fp, "%u", size);
+ unsigned size = nir_alu_type_get_type_size(t);
+ nir_alu_type base = nir_alu_type_get_base_type(t);
+
+ switch (base) {
+ case nir_type_int:
+ fprintf(fp, ".i");
+ break;
+ case nir_type_uint:
+ fprintf(fp, ".u");
+ break;
+ case nir_type_bool:
+ fprintf(fp, ".b");
+ break;
+ case nir_type_float:
+ fprintf(fp, ".f");
+ break;
+ default:
+ fprintf(fp, ".unknown");
+ break;
+ }
+
+ fprintf(fp, "%u", size);
}
/* Could optimize with a better data structure if anyone cares, TODO: profile */
unsigned
-pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs)
+pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo,
+ unsigned offs)
{
- struct panfrost_ubo_word word = {
- .ubo = ubo,
- .offset = offs
- };
+ struct panfrost_ubo_word word = {.ubo = ubo, .offset = offs};
- for (unsigned i = 0; i < push->count; ++i) {
- if (memcmp(push->words + i, &word, sizeof(word)) == 0)
- return i;
- }
-
- unreachable("UBO not pushed");
+ for (unsigned i = 0; i < push->count; ++i) {
+ if (memcmp(push->words + i, &word, sizeof(word)) == 0)
+ return i;
+ }
+ unreachable("UBO not pushed");
}
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index 1ff240b3070..7b24fb6922e 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -26,326 +26,420 @@
#include <stdint.h>
#include "compiler/nir/nir.h"
-#include "util/u_dynarray.h"
#include "util/hash_table.h"
+#include "util/u_dynarray.h"
-/* Define the general compiler entry point */
-
-#define MAX_SYSVAL_COUNT 32
-
-/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
- * their class for equal comparison */
-
-#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
-#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
-#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
-
-/* Define some common types. We start at one for easy indexing of hash
- * tables internal to the compiler */
-
-enum {
- PAN_SYSVAL_VIEWPORT_SCALE = 1,
- PAN_SYSVAL_VIEWPORT_OFFSET = 2,
- PAN_SYSVAL_TEXTURE_SIZE = 3,
- PAN_SYSVAL_SSBO = 4,
- PAN_SYSVAL_NUM_WORK_GROUPS = 5,
- PAN_SYSVAL_SAMPLER = 7,
- PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
- PAN_SYSVAL_WORK_DIM = 9,
- PAN_SYSVAL_IMAGE_SIZE = 10,
- PAN_SYSVAL_SAMPLE_POSITIONS = 11,
- PAN_SYSVAL_MULTISAMPLED = 12,
- PAN_SYSVAL_RT_CONVERSION = 13,
- PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
- PAN_SYSVAL_DRAWID = 15,
-};
+/* Indices for named (non-XFB) varyings that are present. These are packed
+ * tightly so they correspond to a bitfield present (P) indexed by (1 <<
+ * PAN_VARY_*). This has the nice property that you can lookup the buffer index
+ * of a given special field given a shift S by:
+ *
+ * idx = popcount(P & ((1 << S) - 1))
+ *
+ * That is... look at all of the varyings that come earlier and count them, the
+ * count is the new index since plus one. Likewise, the total number of special
+ * buffers required is simply popcount(P)
+ */
+
+enum pan_special_varying {
+ PAN_VARY_GENERAL = 0,
+ PAN_VARY_POSITION = 1,
+ PAN_VARY_PSIZ = 2,
+ PAN_VARY_PNTCOORD = 3,
+ PAN_VARY_FACE = 4,
+ PAN_VARY_FRAGCOORD = 5,
-#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \
- ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
+ /* Keep last */
+ PAN_VARY_MAX,
+};
-#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f)
-#define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3)
-#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
+/* Maximum number of attribute descriptors required for varyings. These include
+ * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
+ * special varying */
+#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
* consistent with the blob so we can compare traces easier. */
-enum {
- PAN_VERTEX_ID = 16,
- PAN_INSTANCE_ID = 17,
- PAN_MAX_ATTRIBUTE
-};
+enum { PAN_VERTEX_ID = 16, PAN_INSTANCE_ID = 17, PAN_MAX_ATTRIBUTE };
-struct panfrost_sysvals {
- /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
- unsigned sysvals[MAX_SYSVAL_COUNT];
- unsigned sysval_count;
-};
-
-/* Technically Midgard could go up to 92 in a pathological case but we don't
- * take advantage of that. Likewise Bifrost's FAU encoding can address 128
- * words but actual implementations (G72, G76) are capped at 64 */
-
-#define PAN_MAX_PUSH 64
+/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each.
+ * In practice, the maximum number of FAU slots is limited by implementation.
+ * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the
+ * maximum number of 32-bit words is 128, since there are 2 words per FAU slot.
+ *
+ * Midgard can push at most 92 words, so this bound suffices. The Midgard
+ * compiler pushes less than this, as Midgard uses register-mapped uniforms
+ * instead of FAU, preventing large numbers of uniforms to be pushed for
+ * nontrivial programs.
+ */
+#define PAN_MAX_PUSH 128
/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
* an offset to a word must be < 2^16. There are less than 2^8 UBOs */
struct panfrost_ubo_word {
- uint16_t ubo;
- uint16_t offset;
+ uint16_t ubo;
+ uint16_t offset;
};
struct panfrost_ubo_push {
- unsigned count;
- struct panfrost_ubo_word words[PAN_MAX_PUSH];
+ unsigned count;
+ struct panfrost_ubo_word words[PAN_MAX_PUSH];
};
/* Helper for searching the above. Note this is O(N) to the number of pushed
* constants, do not run in the draw call hot path */
-unsigned
-pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
-
-struct hash_table_u64 *
-panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);
-
-unsigned
-pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
- struct panfrost_sysvals *sysvals,
- int sysval);
-
-int
-panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
+unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo,
+ unsigned offs);
struct panfrost_compile_inputs {
- unsigned gpu_id;
- bool is_blend, is_blit;
- struct {
- unsigned rt;
- unsigned nr_samples;
- uint64_t bifrost_blend_desc;
- } blend;
- unsigned sysval_ubo;
- bool shaderdb;
- bool no_ubo_to_push;
-
- enum pipe_format rt_formats[8];
- unsigned nr_cbufs;
+ struct util_debug_callback *debug;
+
+ unsigned gpu_id;
+ bool is_blend, is_blit;
+ struct {
+ unsigned nr_samples;
+ uint64_t bifrost_blend_desc;
+ } blend;
+ bool no_idvs;
+ bool no_ubo_to_push;
+
+ /* Used on Valhall.
+ *
+ * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)
+ * written by the previous stage (fragment shader) or written by this
+ * stage (vertex shader). Bits are slots from gl_varying_slot.
+ *
+ * For modern APIs (GLES or VK), this should be 0.
+ */
+ uint32_t fixed_varying_mask;
+
+ union {
+ struct {
+ uint32_t rt_conv[8];
+ } bifrost;
+ };
};
struct pan_shader_varying {
- gl_varying_slot location;
- enum pipe_format format;
+ gl_varying_slot location;
+ enum pipe_format format;
};
struct bifrost_shader_blend_info {
- nir_alu_type type;
- uint32_t return_offset;
+ nir_alu_type type;
+ uint32_t return_offset;
- /* mali_bifrost_register_file_format corresponding to nir_alu_type */
- unsigned format;
+ /* mali_bifrost_register_file_format corresponding to nir_alu_type */
+ unsigned format;
};
-struct bifrost_shader_info {
- struct bifrost_shader_blend_info blend[8];
- nir_alu_type blend_src1_type;
- bool wait_6, wait_7;
+/*
+ * Unpacked form of a v7 message preload descriptor, produced by the compiler's
+ * message preload optimization. By splitting out this struct, the compiler does
+ * not need to know about data structure packing, avoiding a dependency on
+ * GenXML.
+ */
+struct bifrost_message_preload {
+ /* Whether to preload this message */
+ bool enabled;
+
+ /* Varying to load from */
+ unsigned varying_index;
+
+ /* Register type, FP32 otherwise */
+ bool fp16;
- /* Packed, preloaded message descriptors */
- uint16_t messages[2];
+ /* Number of components, ignored if texturing */
+ unsigned num_components;
+
+ /* If texture is set, performs a texture instruction according to
+ * texture_index, skip, and zero_lod. If texture is unset, only the
+ * varying load is performed.
+ */
+ bool texture, skip, zero_lod;
+ unsigned texture_index;
+};
+
+struct bifrost_shader_info {
+ struct bifrost_shader_blend_info blend[8];
+ nir_alu_type blend_src1_type;
+ bool wait_6, wait_7;
+ struct bifrost_message_preload messages[2];
+
+ /* Whether any flat varyings are loaded. This may disable optimizations
+ * that change the provoking vertex, since that would load incorrect
+ * values for flat varyings.
+ */
+ bool uses_flat_shading;
};
struct midgard_shader_info {
- unsigned first_tag;
+ unsigned first_tag;
};
struct pan_shader_info {
- gl_shader_stage stage;
- unsigned work_reg_count;
- unsigned tls_size;
- unsigned wls_size;
-
- union {
- struct {
- bool reads_frag_coord;
- bool reads_point_coord;
- bool reads_face;
- bool helper_invocations;
- bool can_discard;
- bool writes_depth;
- bool writes_stencil;
- bool writes_coverage;
- bool sidefx;
- bool reads_sample_id;
- bool reads_sample_pos;
- bool reads_sample_mask_in;
- bool reads_helper_invocation;
- bool sample_shading;
- bool early_fragment_tests;
- bool can_early_z, can_fpk;
- BITSET_WORD outputs_read;
- BITSET_WORD outputs_written;
- } fs;
-
- struct {
- bool writes_point_size;
- } vs;
- };
-
- bool separable;
- bool contains_barrier;
- bool writes_global;
- uint64_t outputs_written;
-
- unsigned sampler_count;
- unsigned texture_count;
- unsigned ubo_count;
- unsigned attribute_count;
-
- struct {
- unsigned input_count;
- struct pan_shader_varying input[MAX_VARYING];
- unsigned output_count;
- struct pan_shader_varying output[MAX_VARYING];
- } varyings;
-
- struct panfrost_sysvals sysvals;
-
- /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
- * Uniforms (Bifrost) */
- struct panfrost_ubo_push push;
-
- uint32_t ubo_mask;
-
- union {
- struct bifrost_shader_info bifrost;
- struct midgard_shader_info midgard;
- };
+ gl_shader_stage stage;
+ unsigned work_reg_count;
+ unsigned tls_size;
+ unsigned wls_size;
+
+ /* Bit mask of preloaded registers */
+ uint64_t preload;
+
+ union {
+ struct {
+ bool reads_frag_coord;
+ bool reads_point_coord;
+ bool reads_face;
+ bool can_discard;
+ bool writes_depth;
+ bool writes_stencil;
+ bool writes_coverage;
+ bool sidefx;
+ bool sample_shading;
+ bool early_fragment_tests;
+ bool can_early_z, can_fpk;
+ bool untyped_color_outputs;
+ BITSET_WORD outputs_read;
+ BITSET_WORD outputs_written;
+ } fs;
+
+ struct {
+ bool writes_point_size;
+
+ /* If the primary shader writes point size, the Valhall
+ * driver may need a variant that does not write point
+ * size. Offset to such a shader in the program binary.
+ *
+ * Zero if no such variant is required.
+ *
+ * Only used with IDVS on Valhall.
+ */
+ unsigned no_psiz_offset;
+
+ /* Set if Index-Driven Vertex Shading is in use */
+ bool idvs;
+
+ /* If IDVS is used, whether a varying shader is used */
+ bool secondary_enable;
+
+ /* If a varying shader is used, the varying shader's
+ * offset in the program binary
+ */
+ unsigned secondary_offset;
+
+ /* If IDVS is in use, number of work registers used by
+ * the varying shader
+ */
+ unsigned secondary_work_reg_count;
+
+ /* If IDVS is in use, bit mask of preloaded registers
+ * used by the varying shader
+ */
+ uint64_t secondary_preload;
+ } vs;
+
+ struct {
+ /* Is it legal to merge workgroups? This is true if the
+ * shader uses neither barriers nor shared memory. This
+ * requires caution: if the API allows specifying shared
+ * memory at launch time (instead of compile time), that
+ * memory will not be accounted for by the compiler.
+ *
+ * Used by the Valhall hardware.
+ */
+ bool allow_merging_workgroups;
+ } cs;
+ };
+
+ /* Does the shader contains a barrier? or (for fragment shaders) does it
+ * require helper invocations, which demand the same ordering guarantees
+ * of the hardware? These notions are unified in the hardware, so we
+ * unify them here as well.
+ */
+ bool contains_barrier;
+ bool separable;
+ bool writes_global;
+ uint64_t outputs_written;
+
+ /* Floating point controls that the driver should try to honour */
+ bool ftz_fp16, ftz_fp32;
+
+ unsigned sampler_count;
+ unsigned texture_count;
+ unsigned ubo_count;
+ unsigned attributes_read_count;
+ unsigned attribute_count;
+ unsigned attributes_read;
+
+ struct {
+ unsigned input_count;
+ struct pan_shader_varying input[PAN_MAX_VARYINGS];
+ unsigned output_count;
+ struct pan_shader_varying output[PAN_MAX_VARYINGS];
+ } varyings;
+
+ /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
+ * Uniforms (Bifrost) */
+ struct panfrost_ubo_push push;
+
+ uint32_t ubo_mask;
+
+ union {
+ struct bifrost_shader_info bifrost;
+ struct midgard_shader_info midgard;
+ };
};
typedef struct pan_block {
- /* Link to next block. Must be first for mir_get_block */
- struct list_head link;
+ /* Link to next block. Must be first for mir_get_block */
+ struct list_head link;
- /* List of instructions emitted for the current block */
- struct list_head instructions;
+ /* List of instructions emitted for the current block */
+ struct list_head instructions;
- /* Index of the block in source order */
- unsigned name;
+ /* Index of the block in source order */
+ unsigned name;
- /* Control flow graph */
- struct pan_block *successors[2];
- struct set *predecessors;
- bool unconditional_jumps;
+ /* Control flow graph */
+ struct pan_block *successors[2];
+ struct set *predecessors;
+ bool unconditional_jumps;
- /* In liveness analysis, these are live masks (per-component) for
- * indices for the block. Scalar compilers have the luxury of using
- * simple bit fields, but for us, liveness is a vector idea. */
- uint16_t *live_in;
- uint16_t *live_out;
+ /* In liveness analysis, these are live masks (per-component) for
+ * indices for the block. Scalar compilers have the luxury of using
+ * simple bit fields, but for us, liveness is a vector idea. */
+ uint16_t *live_in;
+ uint16_t *live_out;
} pan_block;
struct pan_instruction {
- struct list_head link;
+ struct list_head link;
};
-#define pan_foreach_instr_in_block_rev(block, v) \
- list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
-
-#define pan_foreach_successor(blk, v) \
- pan_block *v; \
- pan_block **_v; \
- for (_v = (pan_block **) &blk->successors[0], \
- v = *_v; \
- v != NULL && _v < (pan_block **) &blk->successors[2]; \
- _v++, v = *_v) \
-
-#define pan_foreach_predecessor(blk, v) \
- struct set_entry *_entry_##v; \
- struct pan_block *v; \
- for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
- v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL); \
- _entry_##v != NULL; \
- _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
- v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
+#define pan_foreach_instr_in_block_rev(block, v) \
+ list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, \
+ link)
+
+#define pan_foreach_successor(blk, v) \
+ pan_block *v; \
+ pan_block **_v; \
+ for (_v = (pan_block **)&blk->successors[0], v = *_v; \
+ v != NULL && _v < (pan_block **)&blk->successors[2]; _v++, v = *_v)
+
+#define pan_foreach_predecessor(blk, v) \
+ struct set_entry *_entry_##v; \
+ struct pan_block *v; \
+ for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
+ v = (struct pan_block *)(_entry_##v ? _entry_##v->key : NULL); \
+ _entry_##v != NULL; \
+ _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
+ v = (struct pan_block *)(_entry_##v ? _entry_##v->key : NULL))
static inline pan_block *
pan_exit_block(struct list_head *blocks)
{
- pan_block *last = list_last_entry(blocks, pan_block, link);
- assert(!last->successors[0] && !last->successors[1]);
- return last;
+ pan_block *last = list_last_entry(blocks, pan_block, link);
+ assert(!last->successors[0] && !last->successors[1]);
+ return last;
}
typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
-void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
-void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
+void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max,
+ uint16_t mask);
+void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max,
+ uint16_t mask);
bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
-void pan_compute_liveness(struct list_head *blocks,
- unsigned temp_count,
- pan_liveness_update callback);
+void pan_compute_liveness(struct list_head *blocks, unsigned temp_count,
+ pan_liveness_update callback);
void pan_free_liveness(struct list_head *blocks);
-uint16_t
-pan_to_bytemask(unsigned bytes, unsigned mask);
+uint16_t pan_to_bytemask(unsigned bytes, unsigned mask);
void pan_block_add_successor(pan_block *block, pan_block *successor);
/* IR indexing */
#define PAN_IS_REG (1)
-static inline unsigned
-pan_ssa_index(nir_ssa_def *ssa)
-{
- /* Off-by-one ensures BIR_NO_ARG is skipped */
- return ((ssa->index + 1) << 1) | 0;
-}
-
-static inline unsigned
-pan_src_index(nir_src *src)
-{
- if (src->is_ssa)
- return pan_ssa_index(src->ssa);
- else {
- assert(!src->reg.indirect);
- return (src->reg.reg->index << 1) | PAN_IS_REG;
- }
-}
-
-static inline unsigned
-pan_dest_index(nir_dest *dst)
-{
- if (dst->is_ssa)
- return pan_ssa_index(&dst->ssa);
- else {
- assert(!dst->reg.indirect);
- return (dst->reg.reg->index << 1) | PAN_IS_REG;
- }
-}
-
/* IR printing helpers */
void pan_print_alu_type(nir_alu_type t, FILE *fp);
-/* Until it can be upstreamed.. */
-bool pan_has_source_mod(nir_alu_src *src, nir_op op);
-bool pan_has_dest_mod(nir_dest **dest, nir_op op);
-
/* NIR passes to do some backend-specific lowering */
#define PAN_WRITEOUT_C 1
#define PAN_WRITEOUT_Z 2
#define PAN_WRITEOUT_S 4
+#define PAN_WRITEOUT_2 8
-bool pan_nir_reorder_writeout(nir_shader *nir);
bool pan_nir_lower_zs_store(nir_shader *nir);
+bool pan_nir_lower_store_component(nir_shader *shader);
+bool pan_nir_lower_image_ms(nir_shader *shader);
bool pan_nir_lower_64bit_intrin(nir_shader *shader);
bool pan_lower_helper_invocation(nir_shader *shader);
bool pan_lower_sample_pos(nir_shader *shader);
+bool pan_lower_xfb(nir_shader *nir);
+
+bool pan_lower_image_index(nir_shader *shader, unsigned vs_img_attrib_offset);
+
+void pan_nir_collect_varyings(nir_shader *s, struct pan_shader_info *info);
+
+/*
+ * Helper returning the subgroup size. Generally, this is equal to the number of
+ * threads in a warp. For Midgard (including warping models), this returns 1, as
+ * subgroups are not supported.
+ */
+static inline unsigned
+pan_subgroup_size(unsigned arch)
+{
+ if (arch >= 9)
+ return 16;
+ else if (arch >= 7)
+ return 8;
+ else if (arch >= 6)
+ return 4;
+ else
+ return 1;
+}
+
+/*
+ * Helper extracting the table from a given handle of Valhall descriptor model.
+ */
+static inline unsigned
+pan_res_handle_get_table(unsigned handle)
+{
+ unsigned table = handle >> 24;
+
+ assert(table < 64);
+ return table;
+}
+
+/*
+ * Helper returning the index from a given handle of Valhall descriptor model.
+ */
+static inline unsigned
+pan_res_handle_get_index(unsigned handle)
+{
+ return handle & BITFIELD_MASK(24);
+}
+
+/*
+ * Helper creating an handle for Valhall descriptor model.
+ */
+static inline unsigned
+pan_res_handle(unsigned table, unsigned index)
+{
+ assert(table < 64);
+ assert(index < (1u << 24));
+
+ return (table << 24) | index;
+}
#endif
diff --git a/src/panfrost/util/pan_liveness.c b/src/panfrost/util/pan_liveness.c
index 0ec9652b59d..e299bc29d75 100644
--- a/src/panfrost/util/pan_liveness.c
+++ b/src/panfrost/util/pan_liveness.c
@@ -21,10 +21,10 @@
* SOFTWARE.
*/
-#include "pan_ir.h"
-#include "util/u_memory.h"
#include "util/list.h"
#include "util/set.h"
+#include "util/u_memory.h"
+#include "pan_ir.h"
/* Routines for liveness analysis. Liveness is tracked per byte per node. Per
* byte granularity is necessary for proper handling of int8 */
@@ -32,28 +32,28 @@
void
pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask)
{
- if (node >= max)
- return;
+ if (node >= max)
+ return;
- live[node] |= mask;
+ live[node] |= mask;
}
void
pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask)
{
- if (node >= max)
- return;
+ if (node >= max)
+ return;
- live[node] &= ~mask;
+ live[node] &= ~mask;
}
bool
pan_liveness_get(uint16_t *live, unsigned node, uint16_t max)
{
- if (node >= max)
- return false;
+ if (node >= max)
+ return false;
- return live[node];
+ return live[node];
}
/* live_out[s] = sum { p in succ[s] } ( live_in[p] ) */
@@ -61,10 +61,10 @@ pan_liveness_get(uint16_t *live, unsigned node, uint16_t max)
static void
liveness_block_live_out(pan_block *blk, unsigned temp_count)
{
- pan_foreach_successor(blk, succ) {
- for (unsigned i = 0; i < temp_count; ++i)
- blk->live_out[i] |= succ->live_in[i];
- }
+ pan_foreach_successor(blk, succ) {
+ for (unsigned i = 0; i < temp_count; ++i)
+ blk->live_out[i] |= succ->live_in[i];
+ }
}
/* Liveness analysis is a backwards-may dataflow analysis pass. Within a block,
@@ -72,32 +72,30 @@ liveness_block_live_out(pan_block *blk, unsigned temp_count)
* returns whether progress was made. */
static bool
-liveness_block_update(
- pan_block *blk, unsigned temp_count,
- pan_liveness_update callback)
+liveness_block_update(pan_block *blk, unsigned temp_count,
+ pan_liveness_update callback)
{
- bool progress = false;
+ bool progress = false;
- liveness_block_live_out(blk, temp_count);
+ liveness_block_live_out(blk, temp_count);
- uint16_t *live = ralloc_array(blk, uint16_t, temp_count);
- memcpy(live, blk->live_out, temp_count * sizeof(uint16_t));
+ uint16_t *live = ralloc_array(blk, uint16_t, temp_count);
+ memcpy(live, blk->live_out, temp_count * sizeof(uint16_t));
- pan_foreach_instr_in_block_rev(blk, ins)
- callback(live, (void *) ins, temp_count);
+ pan_foreach_instr_in_block_rev(blk, ins)
+ callback(live, (void *)ins, temp_count);
- /* To figure out progress, diff live_in */
+ /* To figure out progress, diff live_in */
- for (unsigned i = 0; (i < temp_count) && !progress; ++i)
- progress |= (blk->live_in[i] != live[i]);
+ for (unsigned i = 0; (i < temp_count) && !progress; ++i)
+ progress |= (blk->live_in[i] != live[i]);
- ralloc_free(blk->live_in);
- blk->live_in = live;
+ ralloc_free(blk->live_in);
+ blk->live_in = live;
- return progress;
+ return progress;
}
-
/* Globally, liveness analysis uses a fixed-point algorithm based on a
* worklist. We initialize a work list with the exit block. We iterate the work
* list to compute live_in from live_out for each block on the work list,
@@ -105,70 +103,66 @@ liveness_block_update(
*/
void
-pan_compute_liveness(
- struct list_head *blocks,
- unsigned temp_count,
- pan_liveness_update callback)
+pan_compute_liveness(struct list_head *blocks, unsigned temp_count,
+ pan_liveness_update callback)
{
- /* Set of pan_block */
- struct set *work_list = _mesa_set_create(NULL,
- _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ /* Set of pan_block */
+ struct set *work_list =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- struct set *visited = _mesa_set_create(NULL,
- _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ struct set *visited =
+ _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
- /* Free any previous liveness, and allocate */
+ /* Free any previous liveness, and allocate */
- pan_free_liveness(blocks);
+ pan_free_liveness(blocks);
- list_for_each_entry(pan_block, block, blocks, link) {
- block->live_in = rzalloc_array(block, uint16_t, temp_count);
- block->live_out = rzalloc_array(block, uint16_t, temp_count);
- }
+ list_for_each_entry(pan_block, block, blocks, link) {
+ block->live_in = rzalloc_array(block, uint16_t, temp_count);
+ block->live_out = rzalloc_array(block, uint16_t, temp_count);
+ }
- /* Initialize the work list with the exit block */
- struct set_entry *cur;
+ /* Initialize the work list with the exit block */
+ struct set_entry *cur;
- cur = _mesa_set_add(work_list, pan_exit_block(blocks));
+ cur = _mesa_set_add(work_list, pan_exit_block(blocks));
- /* Iterate the work list */
+ /* Iterate the work list */
- do {
- /* Pop off a block */
- pan_block *blk = (struct pan_block *) cur->key;
- _mesa_set_remove(work_list, cur);
+ do {
+ /* Pop off a block */
+ pan_block *blk = (struct pan_block *)cur->key;
+ _mesa_set_remove(work_list, cur);
- /* Update its liveness information */
- bool progress = liveness_block_update(blk, temp_count, callback);
+ /* Update its liveness information */
+ bool progress = liveness_block_update(blk, temp_count, callback);
- /* If we made progress, we need to process the predecessors */
+ /* If we made progress, we need to process the predecessors */
- if (progress || !_mesa_set_search(visited, blk)) {
- pan_foreach_predecessor(blk, pred)
- _mesa_set_add(work_list, pred);
- }
+ if (progress || !_mesa_set_search(visited, blk)) {
+ pan_foreach_predecessor(blk, pred)
+ _mesa_set_add(work_list, pred);
+ }
- _mesa_set_add(visited, blk);
- } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
+ _mesa_set_add(visited, blk);
+ } while ((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
- _mesa_set_destroy(visited, NULL);
- _mesa_set_destroy(work_list, NULL);
+ _mesa_set_destroy(visited, NULL);
+ _mesa_set_destroy(work_list, NULL);
}
void
pan_free_liveness(struct list_head *blocks)
{
- list_for_each_entry(pan_block, block, blocks, link) {
- if (block->live_in)
- ralloc_free(block->live_in);
+ list_for_each_entry(pan_block, block, blocks, link) {
+ if (block->live_in)
+ ralloc_free(block->live_in);
- if (block->live_out)
- ralloc_free(block->live_out);
+ if (block->live_out)
+ ralloc_free(block->live_out);
- block->live_in = NULL;
- block->live_out = NULL;
- }
+ block->live_in = NULL;
+ block->live_out = NULL;
+ }
}
diff --git a/src/panfrost/util/pan_lower_64bit_intrin.c b/src/panfrost/util/pan_lower_64bit_intrin.c
deleted file mode 100644
index 7c4edcfa9d7..00000000000
--- a/src/panfrost/util/pan_lower_64bit_intrin.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2020 Icecream95 <ixn@disroot.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "pan_ir.h"
-#include "compiler/nir/nir_builder.h"
-
-/* OpenCL uses 64-bit types for some intrinsic functions, including
- * global_invocation_id(). This could be worked around during conversion to
- * MIR, except that global_invocation_id is a vec3, and the 128-bit registers
- * on Midgard can only hold a 64-bit vec2.
- * Rather than attempting to add hacky 64-bit vec3 support, convert these
- * intrinsics to 32-bit and add a cast back to 64-bit, and rely on NIR not
- * vectorizing back to vec3.
- */
-
-static bool
-nir_lower_64bit_intrin_instr(nir_builder *b, nir_instr *instr, void *data)
-{
- if (instr->type != nir_instr_type_intrinsic)
- return false;
-
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-
- switch (intr->intrinsic) {
- case nir_intrinsic_load_global_invocation_id:
- case nir_intrinsic_load_global_invocation_id_zero_base:
- case nir_intrinsic_load_workgroup_id:
- case nir_intrinsic_load_num_workgroups:
- break;
-
- default:
- return false;
- }
-
- if (nir_dest_bit_size(intr->dest) != 64)
- return false;
-
- b->cursor = nir_after_instr(instr);
-
- assert(intr->dest.is_ssa);
- intr->dest.ssa.bit_size = 32;
-
- nir_ssa_def *conv = nir_u2u64(b, &intr->dest.ssa);
-
- nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, conv,
- conv->parent_instr);
-
- return true;
-}
-
-bool
-pan_nir_lower_64bit_intrin(nir_shader *shader)
-{
- return nir_shader_instructions_pass(shader,
- nir_lower_64bit_intrin_instr,
- nir_metadata_block_index | nir_metadata_dominance,
- NULL);
-}
diff --git a/src/panfrost/util/pan_lower_framebuffer.c b/src/panfrost/util/pan_lower_framebuffer.c
index 71795fd9416..96e24683b25 100644
--- a/src/panfrost/util/pan_lower_framebuffer.c
+++ b/src/panfrost/util/pan_lower_framebuffer.c
@@ -47,12 +47,11 @@
* smallest precision necessary to store the pixel losslessly.
*/
+#include "pan_lower_framebuffer.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
#include "util/format/u_format.h"
-#include "pan_lower_framebuffer.h"
-#include "panfrost-quirks.h"
/* Determines the unpacked type best suiting a given format, so the rest of the
* pipeline may be adjusted accordingly */
@@ -60,86 +59,54 @@
nir_alu_type
pan_unpacked_type_for_format(const struct util_format_description *desc)
{
- int c = util_format_get_first_non_void_channel(desc->format);
-
- if (c == -1)
- unreachable("Void format not renderable");
-
- bool large = (desc->channel[c].size > 16);
- bool large_norm = (desc->channel[c].size > 8);
- bool bit8 = (desc->channel[c].size == 8);
- assert(desc->channel[c].size <= 32);
-
- if (desc->channel[c].normalized)
- return large_norm ? nir_type_float32 : nir_type_float16;
-
- switch (desc->channel[c].type) {
- case UTIL_FORMAT_TYPE_UNSIGNED:
- return bit8 ? nir_type_uint8 :
- large ? nir_type_uint32 : nir_type_uint16;
- case UTIL_FORMAT_TYPE_SIGNED:
- return bit8 ? nir_type_int8 :
- large ? nir_type_int32 : nir_type_int16;
- case UTIL_FORMAT_TYPE_FLOAT:
- return large ? nir_type_float32 : nir_type_float16;
- default:
- unreachable("Format not renderable");
- }
+ int c = util_format_get_first_non_void_channel(desc->format);
+
+ if (c == -1)
+ unreachable("Void format not renderable");
+
+ bool large = (desc->channel[c].size > 16);
+ bool large_norm = (desc->channel[c].size > 8);
+ bool bit8 = (desc->channel[c].size == 8);
+ assert(desc->channel[c].size <= 32);
+
+ if (desc->channel[c].normalized)
+ return large_norm ? nir_type_float32 : nir_type_float16;
+
+ switch (desc->channel[c].type) {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ return bit8 ? nir_type_uint8 : large ? nir_type_uint32 : nir_type_uint16;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ return bit8 ? nir_type_int8 : large ? nir_type_int32 : nir_type_int16;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ return large ? nir_type_float32 : nir_type_float16;
+ default:
+ unreachable("Format not renderable");
+ }
}
-static enum pan_format_class
-pan_format_class_load(const struct util_format_description *desc, unsigned quirks)
+static bool
+pan_is_format_native(const struct util_format_description *desc,
+ bool broken_ld_special, bool is_store)
{
- /* Pure integers can be loaded via EXT_framebuffer_fetch and should be
- * handled as a raw load with a size conversion (it's cheap). Likewise,
- * since float framebuffers are internally implemented as raw (i.e.
- * integer) framebuffers with blend shaders to go back and forth, they
- * should be s/w as well */
-
- if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format))
- return PAN_FORMAT_SOFTWARE;
-
- /* Check if we can do anything better than software architecturally */
- if (quirks & MIDGARD_NO_TYPED_BLEND_LOADS) {
- return (quirks & NO_BLEND_PACKS)
- ? PAN_FORMAT_SOFTWARE : PAN_FORMAT_PACK;
- }
-
- /* Some formats are missing as typed on some GPUs but have unpacks */
- if (quirks & MIDGARD_MISSING_LOADS) {
- switch (desc->format) {
- case PIPE_FORMAT_R11G11B10_FLOAT:
- return PAN_FORMAT_PACK;
- default:
- return PAN_FORMAT_NATIVE;
- }
- }
-
- /* Otherwise, we can do native */
- return PAN_FORMAT_NATIVE;
-}
+ if (is_store || broken_ld_special)
+ return false;
-static enum pan_format_class
-pan_format_class_store(const struct util_format_description *desc, unsigned quirks)
-{
- /* Check if we can do anything better than software architecturally */
- if (quirks & MIDGARD_NO_TYPED_BLEND_STORES) {
- return (quirks & NO_BLEND_PACKS)
- ? PAN_FORMAT_SOFTWARE : PAN_FORMAT_PACK;
- }
+ if (util_format_is_pure_integer(desc->format) ||
+ util_format_is_float(desc->format))
+ return false;
- return PAN_FORMAT_NATIVE;
-}
+ /* Some formats are missing as typed but have unpacks */
+ if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+ return false;
-/* Convenience method */
+ if (desc->is_array) {
+ int c = util_format_get_first_non_void_channel(desc->format);
+ assert(c >= 0);
+ if (desc->channel[c].size > 8)
+ return false;
+ }
-static enum pan_format_class
-pan_format_class(const struct util_format_description *desc, unsigned quirks, bool is_store)
-{
- if (is_store)
- return pan_format_class_store(desc, quirks);
- else
- return pan_format_class_load(desc, quirks);
+ return true;
}
/* Software packs/unpacks, by format class. Packs take in the pixel value typed
@@ -147,430 +114,523 @@ pan_format_class(const struct util_format_description *desc, unsigned quirks, bo
* suitable for storing (with components replicated to fill). Unpacks do the
* reverse but cannot rely on replication. */
-static nir_ssa_def *
-pan_replicate(nir_builder *b, nir_ssa_def *v, unsigned num_components)
+static nir_def *
+pan_replicate(nir_builder *b, nir_def *v, unsigned num_components)
{
- nir_ssa_def *replicated[4];
-
- for (unsigned i = 0; i < 4; ++i)
- replicated[i] = nir_channel(b, v, i % num_components);
+ nir_def *replicated[4];
- return nir_vec(b, replicated, 4);
-}
+ for (unsigned i = 0; i < 4; ++i)
+ replicated[i] = nir_channel(b, v, i % num_components);
-static nir_ssa_def *
-pan_unpack_pure_32(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
-{
- return nir_channels(b, pack, (1 << num_components) - 1);
+ return nir_vec(b, replicated, 4);
}
/* Pure x16 formats are x16 unpacked, so it's similar, but we need to pack
* upper/lower halves of course */
-static nir_ssa_def *
-pan_pack_pure_16(nir_builder *b, nir_ssa_def *v, unsigned num_components)
+static nir_def *
+pan_pack_pure_16(nir_builder *b, nir_def *v, unsigned num_components)
{
- nir_ssa_def *v4 = pan_replicate(b, v, num_components);
+ nir_def *v4 = pan_replicate(b, v, num_components);
- nir_ssa_def *lo = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 0));
- nir_ssa_def *hi = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 2));
+ nir_def *lo = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 0));
+ nir_def *hi = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 2));
- return nir_vec4(b, lo, hi, lo, hi);
+ return nir_vec4(b, lo, hi, lo, hi);
}
-static nir_ssa_def *
-pan_unpack_pure_16(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
+static nir_def *
+pan_unpack_pure_16(nir_builder *b, nir_def *pack, unsigned num_components)
{
- nir_ssa_def *unpacked[4];
+ nir_def *unpacked[4];
+
+ assert(num_components <= 4);
+
+ for (unsigned i = 0; i < num_components; i += 2) {
+ nir_def *halves = nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1));
- assert(num_components <= 4);
+ unpacked[i + 0] = nir_channel(b, halves, 0);
+ unpacked[i + 1] = nir_channel(b, halves, 1);
+ }
- for (unsigned i = 0; i < num_components; i += 2) {
- nir_ssa_def *halves =
- nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1));
+ return nir_pad_vec4(b, nir_vec(b, unpacked, num_components));
+}
+
+static nir_def *
+pan_pack_reorder(nir_builder *b, const struct util_format_description *desc,
+ nir_def *v)
+{
+ unsigned swizzle[4] = {0, 1, 2, 3};
- unpacked[i + 0] = nir_channel(b, halves, 0);
- unpacked[i + 1] = nir_channel(b, halves, 1);
- }
+ for (unsigned i = 0; i < v->num_components; i++) {
+ if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
+ swizzle[i] = desc->swizzle[i];
+ }
- return nir_pad_vec4(b, nir_vec(b, unpacked, num_components));
+ return nir_swizzle(b, v, swizzle, v->num_components);
}
-static nir_ssa_def *
-pan_replicate_4(nir_builder *b, nir_ssa_def *v)
+static nir_def *
+pan_unpack_reorder(nir_builder *b, const struct util_format_description *desc,
+ nir_def *v)
{
- return nir_vec4(b, v, v, v, v);
+ unsigned swizzle[4] = {0, 1, 2, 3};
+
+ for (unsigned i = 0; i < v->num_components; i++) {
+ if (desc->swizzle[i] <= PIPE_SWIZZLE_W)
+ swizzle[desc->swizzle[i]] = i;
+ }
+
+ return nir_swizzle(b, v, swizzle, v->num_components);
}
-static nir_ssa_def *
-pan_pack_pure_8(nir_builder *b, nir_ssa_def *v, unsigned num_components)
+static nir_def *
+pan_pack_pure_8(nir_builder *b, nir_def *v, unsigned num_components)
{
- return pan_replicate_4(b, nir_pack_32_4x8(b, pan_replicate(b, v, num_components)));
+ return nir_replicate(
+ b, nir_pack_32_4x8(b, pan_replicate(b, v, num_components)), 4);
}
-static nir_ssa_def *
-pan_unpack_pure_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components)
+static nir_def *
+pan_unpack_pure_8(nir_builder *b, nir_def *pack, unsigned num_components)
{
- nir_ssa_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
- return nir_channels(b, unpacked, (1 << num_components) - 1);
+ nir_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
+ return nir_trim_vector(b, unpacked, num_components);
}
-/* For <= 8-bits per channel, UNORM formats are packed like UNORM 8, with
- * zeroes spacing out each component as needed */
+static nir_def *
+pan_fsat(nir_builder *b, nir_def *v, bool is_signed)
+{
+ if (is_signed)
+ return nir_fsat_signed_mali(b, v);
+ else
+ return nir_fsat(b, v);
+}
-static nir_ssa_def *
-pan_pack_unorm(nir_builder *b, nir_ssa_def *v,
- unsigned x, unsigned y, unsigned z, unsigned w)
+static float
+norm_scale(bool snorm, unsigned bits)
{
- /* If a channel has N bits, 1.0 is encoded as 2^N - 1 */
- nir_ssa_def *scales = nir_imm_vec4_16(b,
- (1 << x) - 1, (1 << y) - 1,
- (1 << z) - 1, (1 << w) - 1);
+ if (snorm)
+ return (1 << (bits - 1)) - 1;
+ else
+ return (1 << bits) - 1;
+}
- /* If a channel has N bits, we pad out to the byte by (8 - N) bits */
- nir_ssa_def *shifts = nir_imm_ivec4(b, 8 - x, 8 - y, 8 - z, 8 - w);
+/* For <= 8-bits per channel, [U,S]NORM formats are packed like [U,S]NORM 8,
+ * with zeroes spacing out each component as needed */
- nir_ssa_def *f = nir_fmul(b, nir_fsat(b, nir_pad_vec4(b, v)), scales);
- nir_ssa_def *u8 = nir_f2u8(b, nir_fround_even(b, f));
- nir_ssa_def *s = nir_ishl(b, u8, shifts);
- nir_ssa_def *repl = nir_pack_32_4x8(b, s);
+static nir_def *
+pan_pack_norm(nir_builder *b, nir_def *v, unsigned x, unsigned y, unsigned z,
+ unsigned w, bool is_signed)
+{
+ /* If a channel has N bits, 1.0 is encoded as 2^N - 1 for UNORMs and
+ * 2^(N-1) - 1 for SNORMs */
+ nir_def *scales =
+ is_signed ? nir_imm_vec4_16(b, (1 << (x - 1)) - 1, (1 << (y - 1)) - 1,
+ (1 << (z - 1)) - 1, (1 << (w - 1)) - 1)
+ : nir_imm_vec4_16(b, (1 << x) - 1, (1 << y) - 1, (1 << z) - 1,
+ (1 << w) - 1);
+
+ /* If a channel has N bits, we pad out to the byte by (8 - N) bits */
+ nir_def *shifts = nir_imm_ivec4(b, 8 - x, 8 - y, 8 - z, 8 - w);
+ nir_def *clamped = pan_fsat(b, nir_pad_vec4(b, v), is_signed);
+
+ nir_def *f = nir_fmul(b, clamped, scales);
+ nir_def *u8 = nir_f2u8(b, nir_fround_even(b, f));
+ nir_def *s = nir_ishl(b, u8, shifts);
+ nir_def *repl = nir_pack_32_4x8(b, s);
+
+ return nir_replicate(b, repl, 4);
+}
- return pan_replicate_4(b, repl);
+static nir_def *
+pan_pack_unorm(nir_builder *b, nir_def *v, unsigned x, unsigned y, unsigned z,
+ unsigned w)
+{
+ return pan_pack_norm(b, v, x, y, z, w, false);
}
/* RGB10_A2 is packed in the tilebuffer as the bottom 3 bytes being the top
* 8-bits of RGB and the top byte being RGBA as 2-bits packed. As imirkin
* pointed out, this means free conversion to RGBX8 */
-static nir_ssa_def *
-pan_pack_unorm_1010102(nir_builder *b, nir_ssa_def *v)
+static nir_def *
+pan_pack_unorm_1010102(nir_builder *b, nir_def *v)
{
- nir_ssa_def *scale = nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0);
- nir_ssa_def *s = nir_f2u32(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b, v), scale)));
+ nir_def *scale = nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0);
+ nir_def *s =
+ nir_f2u32(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b, v), scale)));
- nir_ssa_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2));
- nir_ssa_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8));
+ nir_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2));
+ nir_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8));
- nir_ssa_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3));
+ nir_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3));
- nir_ssa_def *top =
- nir_ior(b,
- nir_ior(b,
- nir_ishl(b, nir_channel(b, bottom2, 0), nir_imm_int(b, 24 + 0)),
- nir_ishl(b, nir_channel(b, bottom2, 1), nir_imm_int(b, 24 + 2))),
- nir_ior(b,
- nir_ishl(b, nir_channel(b, bottom2, 2), nir_imm_int(b, 24 + 4)),
- nir_ishl(b, nir_channel(b, bottom2, 3), nir_imm_int(b, 24 + 6))));
+ nir_def *top =
+ nir_ior(b,
+ nir_ior(b, nir_ishl_imm(b, nir_channel(b, bottom2, 0), 24 + 0),
+ nir_ishl_imm(b, nir_channel(b, bottom2, 1), 24 + 2)),
+ nir_ior(b, nir_ishl_imm(b, nir_channel(b, bottom2, 2), 24 + 4),
+ nir_ishl_imm(b, nir_channel(b, bottom2, 3), 24 + 6)));
- nir_ssa_def *p = nir_ior(b, top, top8_rgb);
- return pan_replicate_4(b, p);
+ nir_def *p = nir_ior(b, top, top8_rgb);
+ return nir_replicate(b, p, 4);
}
/* On the other hand, the pure int RGB10_A2 is identical to the spec */
-static nir_ssa_def *
-pan_pack_uint_1010102(nir_builder *b, nir_ssa_def *v)
+static nir_def *
+pan_pack_int_1010102(nir_builder *b, nir_def *v, bool is_signed)
{
- nir_ssa_def *shift = nir_ishl(b, nir_u2u32(b, v),
- nir_imm_ivec4(b, 0, 10, 20, 30));
+ v = nir_u2u32(b, v);
+
+ /* Clamp the values */
+ if (is_signed) {
+ v = nir_imin(b, v, nir_imm_ivec4(b, 511, 511, 511, 1));
+ v = nir_imax(b, v, nir_imm_ivec4(b, -512, -512, -512, -2));
+ } else {
+ v = nir_umin(b, v, nir_imm_ivec4(b, 1023, 1023, 1023, 3));
+ }
- nir_ssa_def *p = nir_ior(b,
- nir_ior(b, nir_channel(b, shift, 0), nir_channel(b, shift, 1)),
- nir_ior(b, nir_channel(b, shift, 2), nir_channel(b, shift, 3)));
+ v = nir_ishl(b, v, nir_imm_ivec4(b, 0, 10, 20, 30));
+ v = nir_ior(b, nir_ior(b, nir_channel(b, v, 0), nir_channel(b, v, 1)),
+ nir_ior(b, nir_channel(b, v, 2), nir_channel(b, v, 3)));
- return pan_replicate_4(b, p);
+ return nir_replicate(b, v, 4);
}
-static nir_ssa_def *
-pan_unpack_uint_1010102(nir_builder *b, nir_ssa_def *packed)
+static nir_def *
+pan_unpack_int_1010102(nir_builder *b, nir_def *packed, bool is_signed)
{
- nir_ssa_def *chan = nir_channel(b, packed, 0);
+ nir_def *v = nir_replicate(b, nir_channel(b, packed, 0), 4);
- nir_ssa_def *shift = nir_ushr(b, pan_replicate_4(b, chan),
- nir_imm_ivec4(b, 0, 10, 20, 30));
+ /* Left shift all components so the sign bit is on the MSB, and
+ * can be extended by ishr(). The ishl()+[u,i]shr() combination
+ * sets all unused bits to 0 without requiring a mask.
+ */
+ v = nir_ishl(b, v, nir_imm_ivec4(b, 22, 12, 2, 0));
- nir_ssa_def *mask = nir_iand(b, shift,
- nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3));
+ if (is_signed)
+ v = nir_ishr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30));
+ else
+ v = nir_ushr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30));
- return nir_i2i16(b, mask);
+ return nir_i2i16(b, v);
}
/* NIR means we can *finally* catch a break */
-static nir_ssa_def *
-pan_pack_r11g11b10(nir_builder *b, nir_ssa_def *v)
+static nir_def *
+pan_pack_r11g11b10(nir_builder *b, nir_def *v)
{
- return pan_replicate_4(b, nir_format_pack_11f11f10f(b,
- nir_f2f32(b, v)));
+ return nir_replicate(b, nir_format_pack_11f11f10f(b, nir_f2f32(b, v)), 4);
}
-static nir_ssa_def *
-pan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v)
+static nir_def *
+pan_unpack_r11g11b10(nir_builder *b, nir_def *v)
{
- nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
- nir_ssa_def *f16 = nir_f2fmp(b, f32);
-
- /* Extend to vec4 with alpha */
- nir_ssa_def *components[4] = {
- nir_channel(b, f16, 0),
- nir_channel(b, f16, 1),
- nir_channel(b, f16, 2),
- nir_imm_float16(b, 1.0)
- };
-
- return nir_vec(b, components, 4);
+ nir_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0));
+ nir_def *f16 = nir_f2fmp(b, f32);
+
+ /* Extend to vec4 with alpha */
+ nir_def *components[4] = {nir_channel(b, f16, 0), nir_channel(b, f16, 1),
+ nir_channel(b, f16, 2), nir_imm_float16(b, 1.0)};
+
+ return nir_vec(b, components, 4);
}
/* Wrapper around sRGB conversion */
-static nir_ssa_def *
-pan_linear_to_srgb(nir_builder *b, nir_ssa_def *linear)
+static nir_def *
+pan_linear_to_srgb(nir_builder *b, nir_def *linear)
{
- nir_ssa_def *rgb = nir_channels(b, linear, 0x7);
+ nir_def *rgb = nir_trim_vector(b, linear, 3);
- /* TODO: fp16 native conversion */
- nir_ssa_def *srgb = nir_f2fmp(b,
- nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
+ /* TODO: fp16 native conversion */
+ nir_def *srgb =
+ nir_f2fmp(b, nir_format_linear_to_srgb(b, nir_f2f32(b, rgb)));
- nir_ssa_def *comp[4] = {
- nir_channel(b, srgb, 0),
- nir_channel(b, srgb, 1),
- nir_channel(b, srgb, 2),
- nir_channel(b, linear, 3),
- };
+ nir_def *comp[4] = {
+ nir_channel(b, srgb, 0),
+ nir_channel(b, srgb, 1),
+ nir_channel(b, srgb, 2),
+ nir_channel(b, linear, 3),
+ };
- return nir_vec(b, comp, 4);
+ return nir_vec(b, comp, 4);
+}
+
+static nir_def *
+pan_unpack_pure(nir_builder *b, nir_def *packed, unsigned size, unsigned nr)
+{
+ switch (size) {
+ case 32:
+ return nir_trim_vector(b, packed, nr);
+ case 16:
+ return pan_unpack_pure_16(b, packed, nr);
+ case 8:
+ return pan_unpack_pure_8(b, packed, nr);
+ default:
+ unreachable("Unrenderable size");
+ }
}
/* Generic dispatches for un/pack regardless of format */
-static nir_ssa_def *
-pan_unpack(nir_builder *b,
- const struct util_format_description *desc,
- nir_ssa_def *packed)
+static nir_def *
+pan_unpack(nir_builder *b, const struct util_format_description *desc,
+ nir_def *packed)
{
- if (desc->is_array) {
- int c = util_format_get_first_non_void_channel(desc->format);
- assert(c >= 0);
- struct util_format_channel_description d = desc->channel[c];
-
- if (d.size == 32 || d.size == 16) {
- assert(!d.normalized);
- assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer);
-
- return d.size == 32 ? pan_unpack_pure_32(b, packed, desc->nr_channels) :
- pan_unpack_pure_16(b, packed, desc->nr_channels);
- } else if (d.size == 8) {
- assert(d.pure_integer);
- return pan_unpack_pure_8(b, packed, desc->nr_channels);
- } else {
- unreachable("Unrenderable size");
- }
- }
-
- switch (desc->format) {
- case PIPE_FORMAT_R10G10B10A2_UINT:
- return pan_unpack_uint_1010102(b, packed);
- case PIPE_FORMAT_R11G11B10_FLOAT:
- return pan_unpack_r11g11b10(b, packed);
- default:
- break;
- }
-
- fprintf(stderr, "%s\n", desc->name);
- unreachable("Unknown format");
+ if (desc->is_array) {
+ int c = util_format_get_first_non_void_channel(desc->format);
+ assert(c >= 0);
+ struct util_format_channel_description d = desc->channel[c];
+ nir_def *unpacked = pan_unpack_pure(b, packed, d.size, desc->nr_channels);
+
+ /* Normalized formats are unpacked as integers. We need to
+ * convert to float for the final result.
+ */
+ if (d.normalized) {
+ bool snorm = desc->is_snorm;
+ unsigned float_sz = (d.size <= 8 ? 16 : 32);
+ float multiplier = norm_scale(snorm, d.size);
+
+ nir_def *as_float = snorm ? nir_i2fN(b, unpacked, float_sz)
+ : nir_u2fN(b, unpacked, float_sz);
+
+ return nir_fmul_imm(b, as_float, 1.0 / multiplier);
+ } else {
+ return unpacked;
+ }
+ }
+
+ switch (desc->format) {
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ return pan_unpack_int_1010102(b, packed, false);
+ case PIPE_FORMAT_R10G10B10A2_SINT:
+ case PIPE_FORMAT_B10G10R10A2_SINT:
+ return pan_unpack_int_1010102(b, packed, true);
+ case PIPE_FORMAT_R11G11B10_FLOAT:
+ return pan_unpack_r11g11b10(b, packed);
+ default:
+ break;
+ }
+
+ fprintf(stderr, "%s\n", desc->name);
+ unreachable("Unknown format");
}
-static nir_ssa_def *
-pan_pack(nir_builder *b,
- const struct util_format_description *desc,
- nir_ssa_def *unpacked)
+static nir_def *pan_pack(nir_builder *b,
+ const struct util_format_description *desc,
+ nir_def * unpacked)
{
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- unpacked = pan_linear_to_srgb(b, unpacked);
-
- if (util_format_is_unorm8(desc))
- return pan_pack_unorm(b, unpacked, 8, 8, 8, 8);
-
- if (desc->is_array) {
- int c = util_format_get_first_non_void_channel(desc->format);
- assert(c >= 0);
- struct util_format_channel_description d = desc->channel[c];
-
- if (d.size == 32 || d.size == 16) {
- assert(!d.normalized);
- assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer);
-
- return d.size == 32 ?
- pan_replicate(b, unpacked, desc->nr_channels) :
- pan_pack_pure_16(b, unpacked, desc->nr_channels);
- } else if (d.size == 8) {
- assert(d.pure_integer);
- return pan_pack_pure_8(b, unpacked, desc->nr_channels);
- } else {
- unreachable("Unrenderable size");
- }
- }
-
- switch (desc->format) {
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- case PIPE_FORMAT_B4G4R4X4_UNORM:
- case PIPE_FORMAT_A4R4_UNORM:
- case PIPE_FORMAT_R4A4_UNORM:
- case PIPE_FORMAT_A4B4G4R4_UNORM:
- case PIPE_FORMAT_R4G4B4A4_UNORM:
- return pan_pack_unorm(b, unpacked, 4, 4, 4, 4);
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_R5G5B5A1_UNORM:
- return pan_pack_unorm(b, unpacked, 5, 6, 5, 1);
- case PIPE_FORMAT_R5G6B5_UNORM:
- case PIPE_FORMAT_B5G6R5_UNORM:
- return pan_pack_unorm(b, unpacked, 5, 6, 5, 0);
- case PIPE_FORMAT_R10G10B10A2_UNORM:
- case PIPE_FORMAT_B10G10R10A2_UNORM:
- return pan_pack_unorm_1010102(b, unpacked);
- case PIPE_FORMAT_R10G10B10A2_UINT:
- return pan_pack_uint_1010102(b, unpacked);
- case PIPE_FORMAT_R11G11B10_FLOAT:
- return pan_pack_r11g11b10(b, unpacked);
- default:
- break;
- }
-
- fprintf(stderr, "%s\n", desc->name);
- unreachable("Unknown format");
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ unpacked = pan_linear_to_srgb(b, unpacked);
+
+ if (desc->is_array) {
+ int c = util_format_get_first_non_void_channel(desc->format);
+ assert(c >= 0);
+ struct util_format_channel_description d = desc->channel[c];
+
+ /* Pure formats are packed as-is */
+ nir_def *raw = unpacked;
+
+ /* Normalized formats get normalized first */
+ if (d.normalized) {
+ bool snorm = desc->is_snorm;
+ float multiplier = norm_scale(snorm, d.size);
+ nir_def *clamped = pan_fsat(b, unpacked, snorm);
+ nir_def *normed = nir_fmul_imm(b, clamped, multiplier);
+
+ raw = nir_f2uN(b, normed, d.size);
+ }
+
+ /* Pack the raw format */
+ switch (d.size) {
+ case 32:
+ return pan_replicate(b, raw, desc->nr_channels);
+ case 16:
+ return pan_pack_pure_16(b, raw, desc->nr_channels);
+ case 8:
+ return pan_pack_pure_8(b, raw, desc->nr_channels);
+ default:
+ unreachable("Unrenderable size");
+ }
+ }
+
+ switch (desc->format) {
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ case PIPE_FORMAT_A4R4_UNORM:
+ case PIPE_FORMAT_R4A4_UNORM:
+ case PIPE_FORMAT_A4B4G4R4_UNORM:
+ case PIPE_FORMAT_R4G4B4A4_UNORM:
+ return pan_pack_unorm(b, unpacked, 4, 4, 4, 4);
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_R5G5B5A1_UNORM:
+ return pan_pack_unorm(b, unpacked, 5, 6, 5, 1);
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ return pan_pack_unorm(b, unpacked, 5, 6, 5, 0);
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return pan_pack_unorm_1010102(b, unpacked);
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ return pan_pack_int_1010102(b, unpacked, false);
+ case PIPE_FORMAT_R10G10B10A2_SINT:
+ case PIPE_FORMAT_B10G10R10A2_SINT:
+ return pan_pack_int_1010102(b, unpacked, true);
+ case PIPE_FORMAT_R11G11B10_FLOAT:
+ return pan_pack_r11g11b10(b, unpacked);
+ default:
+ break;
+ }
+
+ fprintf(stderr, "%s\n", desc->name);
+ unreachable("Unknown format");
}
static void
-pan_lower_fb_store(nir_shader *shader,
- nir_builder *b,
- nir_intrinsic_instr *intr,
- const struct util_format_description *desc,
- unsigned quirks)
+pan_lower_fb_store(nir_builder *b, nir_intrinsic_instr *intr,
+ const struct util_format_description *desc,
+ bool reorder_comps, unsigned nr_samples)
{
- /* For stores, add conversion before */
- nir_ssa_def *unpacked = nir_ssa_for_src(b, intr->src[1], 4);
- nir_ssa_def *packed = pan_pack(b, desc, unpacked);
+ /* For stores, add conversion before */
+ nir_def *unpacked = intr->src[0].ssa;
+ unpacked = nir_pad_vec4(b, unpacked);
- nir_store_raw_output_pan(b, packed);
+ /* Re-order the components */
+ if (reorder_comps)
+ unpacked = pan_pack_reorder(b, desc, unpacked);
+
+ nir_def *packed = pan_pack(b, desc, unpacked);
+
+ /* We have to split writeout in 128 bit chunks */
+ unsigned iterations = DIV_ROUND_UP(desc->block.bits * nr_samples, 128);
+
+ for (unsigned s = 0; s < iterations; ++s) {
+ nir_store_raw_output_pan(b, packed,
+ .io_semantics = nir_intrinsic_io_semantics(intr),
+ .base = s);
+ }
}
-static nir_ssa_def *
+static nir_def *
pan_sample_id(nir_builder *b, int sample)
{
- return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b);
+ return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b);
}
static void
-pan_lower_fb_load(nir_shader *shader,
- nir_builder *b,
- nir_intrinsic_instr *intr,
- const struct util_format_description *desc,
- unsigned base, int sample, unsigned quirks)
+pan_lower_fb_load(nir_builder *b, nir_intrinsic_instr *intr,
+ const struct util_format_description *desc,
+ bool reorder_comps, int sample)
+{
+ nir_def *packed =
+ nir_load_raw_output_pan(b, 4, 32, pan_sample_id(b, sample),
+ .io_semantics = nir_intrinsic_io_semantics(intr));
+
+ /* Convert the raw value */
+ nir_def *unpacked = pan_unpack(b, desc, packed);
+
+ /* Convert to the size of the load intrinsic.
+ *
+ * We can assume that the type will match with the framebuffer format:
+ *
+ * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says:
+ *
+ * If [UNORM or SNORM, convert to fixed-point]; otherwise no type
+ * conversion is applied. If the values written by the fragment shader
+ * do not match the format(s) of the corresponding color buffer(s),
+ * the result is undefined.
+ */
+
+ unsigned bits = intr->def.bit_size;
+
+ nir_alu_type src_type =
+ nir_alu_type_get_base_type(pan_unpacked_type_for_format(desc));
+
+ unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
+ unpacked = nir_resize_vector(b, unpacked, intr->def.num_components);
+
+ /* Reorder the components */
+ if (reorder_comps)
+ unpacked = pan_unpack_reorder(b, desc, unpacked);
+
+ nir_def_rewrite_uses_after(&intr->def, unpacked, &intr->instr);
+}
+
+struct inputs {
+ const enum pipe_format *rt_fmts;
+ uint8_t raw_fmt_mask;
+ bool is_blend;
+ bool broken_ld_special;
+ unsigned nr_samples;
+};
+
+static bool
+lower(nir_builder *b, nir_instr *instr, void *data)
{
- nir_ssa_def *packed =
- nir_load_raw_output_pan(b, 4, 32, pan_sample_id(b, sample),
- .base = base);
-
- /* Convert the raw value */
- nir_ssa_def *unpacked = pan_unpack(b, desc, packed);
-
- /* Convert to the size of the load intrinsic.
- *
- * We can assume that the type will match with the framebuffer format:
- *
- * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says:
- *
- * If [UNORM or SNORM, convert to fixed-point]; otherwise no type
- * conversion is applied. If the values written by the fragment shader
- * do not match the format(s) of the corresponding color buffer(s),
- * the result is undefined.
- */
-
- unsigned bits = nir_dest_bit_size(intr->dest);
-
- nir_alu_type src_type = nir_alu_type_get_base_type(
- pan_unpacked_type_for_format(desc));
-
- unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits);
- unpacked = nir_pad_vector(b, unpacked, nir_dest_num_components(intr->dest));
-
- nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, unpacked, &intr->instr);
+ struct inputs *inputs = data;
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ bool is_load = intr->intrinsic == nir_intrinsic_load_output;
+ bool is_store = intr->intrinsic == nir_intrinsic_store_output;
+
+ if (!(is_load || (is_store && inputs->is_blend)))
+ return false;
+
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+ if (sem.location < FRAG_RESULT_DATA0)
+ return false;
+
+ unsigned rt = sem.location - FRAG_RESULT_DATA0;
+ if (inputs->rt_fmts[rt] == PIPE_FORMAT_NONE)
+ return false;
+
+ const struct util_format_description *desc =
+ util_format_description(inputs->rt_fmts[rt]);
+
+ /* Don't lower */
+ if (pan_is_format_native(desc, inputs->broken_ld_special, is_store))
+ return false;
+
+ /* EXT_shader_framebuffer_fetch requires per-sample loads. MSAA blend
+ * shaders are not yet handled, so for now always load sample 0.
+ */
+ int sample = inputs->is_blend ? 0 : -1;
+ bool reorder_comps = inputs->raw_fmt_mask & BITFIELD_BIT(rt);
+
+ if (is_store) {
+ b->cursor = nir_before_instr(instr);
+ pan_lower_fb_store(b, intr, desc, reorder_comps, inputs->nr_samples);
+ } else {
+ b->cursor = nir_after_instr(instr);
+ pan_lower_fb_load(b, intr, desc, reorder_comps, sample);
+ }
+
+ nir_instr_remove(instr);
+ return true;
}
bool
pan_lower_framebuffer(nir_shader *shader, const enum pipe_format *rt_fmts,
- bool is_blend, unsigned quirks)
+ uint8_t raw_fmt_mask, unsigned blend_shader_nr_samples,
+ bool broken_ld_special)
{
- if (shader->info.stage != MESA_SHADER_FRAGMENT)
- return false;
-
- bool progress = false;
-
- nir_foreach_function(func, shader) {
- nir_foreach_block(block, func->impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-
- bool is_load = intr->intrinsic == nir_intrinsic_load_deref;
- bool is_store = intr->intrinsic == nir_intrinsic_store_deref;
-
- if (!(is_load || (is_store && is_blend)))
- continue;
-
- nir_variable *var = nir_intrinsic_get_var(intr, 0);
-
- if (var->data.mode != nir_var_shader_out)
- continue;
-
- if (var->data.location < FRAG_RESULT_DATA0)
- continue;
-
- unsigned base = var->data.driver_location;
- unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-
- if (rt_fmts[rt] == PIPE_FORMAT_NONE)
- continue;
-
- const struct util_format_description *desc =
- util_format_description(rt_fmts[rt]);
-
- enum pan_format_class fmt_class =
- pan_format_class(desc, quirks, is_store);
-
- /* Don't lower */
- if (fmt_class == PAN_FORMAT_NATIVE)
- continue;
-
- /* EXT_shader_framebuffer_fetch requires
- * per-sample loads.
- * MSAA blend shaders are not yet handled, so
- * for now always load sample 0. */
- int sample = is_blend ? 0 : -1;
-
- nir_builder b;
- nir_builder_init(&b, func->impl);
-
- if (is_store) {
- b.cursor = nir_before_instr(instr);
- pan_lower_fb_store(shader, &b, intr, desc, quirks);
- } else {
- b.cursor = nir_after_instr(instr);
- pan_lower_fb_load(shader, &b, intr, desc, base, sample, quirks);
- }
-
- nir_instr_remove(instr);
-
- progress = true;
- }
- }
-
- nir_metadata_preserve(func->impl, nir_metadata_block_index |
- nir_metadata_dominance);
- }
-
- return progress;
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+
+ return nir_shader_instructions_pass(
+ shader, lower, nir_metadata_block_index | nir_metadata_dominance,
+ &(struct inputs){
+ .rt_fmts = rt_fmts,
+ .raw_fmt_mask = raw_fmt_mask,
+ .nr_samples = blend_shader_nr_samples,
+ .is_blend = blend_shader_nr_samples > 0,
+ .broken_ld_special = broken_ld_special,
+ });
}
diff --git a/src/panfrost/util/pan_lower_framebuffer.h b/src/panfrost/util/pan_lower_framebuffer.h
index 5491cd346b1..bf6509175bb 100644
--- a/src/panfrost/util/pan_lower_framebuffer.h
+++ b/src/panfrost/util/pan_lower_framebuffer.h
@@ -30,18 +30,12 @@
#include "compiler/nir/nir.h"
#include "util/format/u_format.h"
-/* NATIVE formats can use a typed load/store. PACK formats cannot but can use a
- * typed pack/unpack instruction. SOFTWARE formats are lowered */
-
-enum pan_format_class {
- PAN_FORMAT_NATIVE,
- PAN_FORMAT_PACK,
- PAN_FORMAT_SOFTWARE
-};
-
-nir_alu_type pan_unpacked_type_for_format(const struct util_format_description *desc);
+nir_alu_type
+pan_unpacked_type_for_format(const struct util_format_description *desc);
bool pan_lower_framebuffer(nir_shader *shader, const enum pipe_format *rt_fmts,
- bool is_blend, unsigned quirks);
+ uint8_t raw_fmt_mask,
+ unsigned blend_shader_nr_samples,
+ bool broken_ld_special);
#endif
diff --git a/src/panfrost/util/pan_lower_helper_invocation.c b/src/panfrost/util/pan_lower_helper_invocation.c
index 23a37a15dd3..d4d1614f0f4 100644
--- a/src/panfrost/util/pan_lower_helper_invocation.c
+++ b/src/panfrost/util/pan_lower_helper_invocation.c
@@ -21,8 +21,8 @@
* SOFTWARE.
*/
-#include "pan_ir.h"
#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
/* Lower gl_HelperInvocation to (gl_SampleMaskIn == 0), this depends on
* architectural details but is required for correct operation with
@@ -30,29 +30,25 @@
* way to implement load_sample_id_no_per_sample. */
static bool
-pan_lower_helper_invocation_instr(nir_builder *b, nir_instr *instr, void *data)
+pan_lower_helper_invocation_instr(nir_builder *b, nir_intrinsic_instr *intr,
+ void *data)
{
- if (instr->type != nir_instr_type_intrinsic)
- return false;
-
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_load_helper_invocation)
- return false;
+ if (intr->intrinsic != nir_intrinsic_load_helper_invocation)
+ return false;
- b->cursor = nir_before_instr(instr);
+ b->cursor = nir_before_instr(&intr->instr);
- nir_ssa_def *mask = nir_load_sample_mask_in(b);
- nir_ssa_def *eq = nir_ieq(b, mask, nir_imm_int(b, 0));
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, eq);
+ nir_def *mask = nir_load_sample_mask_in(b);
+ nir_def *eq = nir_ieq_imm(b, mask, 0);
+ nir_def_rewrite_uses(&intr->def, eq);
- return true;
+ return true;
}
bool
pan_lower_helper_invocation(nir_shader *shader)
{
- return nir_shader_instructions_pass(shader,
- pan_lower_helper_invocation_instr,
- nir_metadata_block_index | nir_metadata_dominance,
- NULL);
+ return nir_shader_intrinsics_pass(
+ shader, pan_lower_helper_invocation_instr,
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
}
diff --git a/src/panfrost/util/pan_lower_image_index.c b/src/panfrost/util/pan_lower_image_index.c
new file mode 100644
index 00000000000..4f3655af6e4
--- /dev/null
+++ b/src/panfrost/util/pan_lower_image_index.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2024 Collabora, Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
+
+/* Vertex shader gets passed image attribute descriptors through the
+ * vertex attribute descriptor array. This forces us to apply an offset
+ * to all image access to get the actual attribute offset.
+ *
+ * The gallium driver emits the vertex attributes on each draw, and puts
+ * image attributes right after the vertex attributes, which implies passing
+ * vs_img_attrib_offset = util_bitcount64(nir->info.inputs_read).
+ *
+ * The Vulkan driver, on the other hand, uses
+ * VkVertexInputAttributeDescription to build a table of attributes passed
+ * to the shader. While there's no reason for the app to define more
+ * attributes than it actually uses in the vertex shader, it doesn't seem
+ * to be disallowed either. Not to mention that vkCmdSetVertexInputEXT()
+ * allows one to dynamically change the vertex input configuration, and
+ * possibly pass more attributes than referenced by the vertex shader bound to
+ * the command buffer at draw time. Of course, we could carry this information
+ * at the pipeline level, and re-emit the attribute array, but emitting only
+ * when the vertex input configuration is flagged dirty is simpler.
+ * In order for this to work, we use a fixed image attribute offset.
+ */
+static bool
+lower_image_intr(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_image_load &&
+ intr->intrinsic != nir_intrinsic_image_store)
+ return false;
+
+ unsigned img_attr_offset = *(unsigned *)data;
+ nir_def *index = intr->src[0].ssa;
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ index = nir_iadd_imm(b, index, img_attr_offset);
+ nir_src_rewrite(&intr->src[0], index);
+ return true;
+}
+
+bool
+pan_lower_image_index(nir_shader *shader, unsigned vs_img_attrib_offset)
+{
+ if (shader->info.stage != MESA_SHADER_VERTEX)
+ return false;
+
+ return nir_shader_intrinsics_pass(
+ shader, lower_image_intr,
+ nir_metadata_block_index | nir_metadata_dominance, &vs_img_attrib_offset);
+}
diff --git a/src/panfrost/util/pan_lower_image_ms.c b/src/panfrost/util/pan_lower_image_ms.c
new file mode 100644
index 00000000000..0296cf839cd
--- /dev/null
+++ b/src/panfrost/util/pan_lower_image_ms.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2024 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors (Collabora):
+ * Eric Smith <eric.smith@collabora.com>
+ */
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
+
+static bool
+nir_lower_image_ms(nir_builder *b, nir_intrinsic_instr *intr,
+ UNUSED void *data)
+{
+ switch (intr->intrinsic) {
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_texel_address:
+ break;
+ default:
+ return false;
+ }
+
+ if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_MS)
+ return false;
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_def *coord = intr->src[1].ssa;
+ nir_def *index = intr->src[2].ssa;
+
+ /* image2DMS is treated by panfrost as if it were a 3D image, so
+ * the sample index is in src[2]. We need to put this into the coordinates
+ * in the Z component
+ */
+ nir_src_rewrite(&intr->src[1],
+ nir_vector_insert_imm(b, coord,
+ nir_channel(b, index, 0),
+ 2) );
+ nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_3D);
+ return true;
+}
+
+bool
+pan_nir_lower_image_ms(nir_shader *shader)
+{
+ return nir_shader_intrinsics_pass(
+ shader, nir_lower_image_ms,
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
+}
diff --git a/src/panfrost/util/pan_lower_sample_position.c b/src/panfrost/util/pan_lower_sample_position.c
index 12a0c47bbff..1bf4efd3de7 100644
--- a/src/panfrost/util/pan_lower_sample_position.c
+++ b/src/panfrost/util/pan_lower_sample_position.c
@@ -21,8 +21,8 @@
* SOFTWARE.
*/
-#include "pan_ir.h"
#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
/* Sample positions are supplied in a packed 8:8 fixed-point vec2 format in GPU
* memory indexed by the sample. We lower in NIR to take advantage of possible
@@ -33,43 +33,38 @@
* it's a pretty trivial difference */
static bool
-pan_lower_sample_pos_impl(struct nir_builder *b,
- nir_instr *instr, UNUSED void *data)
+pan_lower_sample_pos_impl(struct nir_builder *b, nir_intrinsic_instr *intr,
+ UNUSED void *data)
{
- if (instr->type != nir_instr_type_intrinsic)
- return false;
-
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_load_sample_pos)
- return false;
+ if (intr->intrinsic != nir_intrinsic_load_sample_pos)
+ return false;
- b->cursor = nir_before_instr(instr);
+ b->cursor = nir_before_instr(&intr->instr);
- /* Elements are 4 bytes */
- nir_ssa_def *addr = nir_iadd(b,
- nir_load_sample_positions_pan(b),
- nir_u2u64(b, nir_imul_imm(b, nir_load_sample_id(b), 4)));
+ /* Elements are 4 bytes */
+ nir_def *addr =
+ nir_iadd(b, nir_load_sample_positions_pan(b),
+ nir_u2u64(b, nir_imul_imm(b, nir_load_sample_id(b), 4)));
- /* Decode 8:8 fixed-point */
- nir_ssa_def *raw = nir_load_global(b, addr, 2, 2, 16);
- nir_ssa_def *decoded = nir_fmul_imm(b, nir_i2f16(b, raw), 1.0 / 256.0);
+ /* Decode 8:8 fixed-point */
+ nir_def *raw = nir_load_global(b, addr, 2, 2, 16);
+ nir_def *decoded = nir_fmul_imm(b, nir_i2f16(b, raw), 1.0 / 256.0);
- /* Make NIR validator happy */
- if (decoded->bit_size != nir_dest_bit_size(intr->dest))
- decoded = nir_f2fN(b, decoded, nir_dest_bit_size(intr->dest));
+ /* Make NIR validator happy */
+ if (decoded->bit_size != intr->def.bit_size)
+ decoded = nir_f2fN(b, decoded, intr->def.bit_size);
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, decoded);
- return true;
+ nir_def_rewrite_uses(&intr->def, decoded);
+ return true;
}
bool
pan_lower_sample_pos(nir_shader *shader)
{
- if (shader->info.stage != MESA_SHADER_FRAGMENT)
- return false;
+ if (shader->info.stage != MESA_SHADER_FRAGMENT)
+ return false;
- return nir_shader_instructions_pass(shader,
- pan_lower_sample_pos_impl,
- nir_metadata_block_index | nir_metadata_dominance,
- NULL);
+ return nir_shader_intrinsics_pass(
+ shader, pan_lower_sample_pos_impl,
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
}
diff --git a/src/panfrost/util/pan_lower_store_component.c b/src/panfrost/util/pan_lower_store_component.c
new file mode 100644
index 00000000000..d591aa9234d
--- /dev/null
+++ b/src/panfrost/util/pan_lower_store_component.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2020-2022 Collabora Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors (Collabora):
+ * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
+
+/*
+ * If the shader packs multiple varyings into the same location with different
+ * location_frac, we'll need to lower to a single varying store that collects
+ * all of the channels together. This is because the varying instruction on
+ * Midgard and Bifrost is slot-based, writing out an entire vec4 slot at a time.
+ */
+static bool
+lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ return false;
+
+ struct hash_table_u64 *slots = data;
+ unsigned component = nir_intrinsic_component(intr);
+ nir_src *slot_src = nir_get_io_offset_src(intr);
+ uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
+
+ nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
+ unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
+
+ nir_def *value = intr->src[0].ssa;
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_def *undef = nir_undef(b, 1, value->bit_size);
+ nir_def *channels[4] = {undef, undef, undef, undef};
+
+ /* Copy old */
+ u_foreach_bit(i, mask) {
+ assert(prev != NULL);
+ nir_def *prev_ssa = prev->src[0].ssa;
+ channels[i] = nir_channel(b, prev_ssa, i);
+ }
+
+ /* Copy new */
+ unsigned new_mask = nir_intrinsic_write_mask(intr);
+ mask |= (new_mask << component);
+
+ u_foreach_bit(i, new_mask) {
+ assert(component + i < 4);
+ channels[component + i] = nir_channel(b, value, i);
+ }
+
+ intr->num_components = util_last_bit(mask);
+ nir_src_rewrite(&intr->src[0], nir_vec(b, channels, intr->num_components));
+
+ nir_intrinsic_set_component(intr, 0);
+ nir_intrinsic_set_write_mask(intr, mask);
+
+ if (prev) {
+ _mesa_hash_table_u64_remove(slots, slot);
+ nir_instr_remove(&prev->instr);
+ }
+
+ _mesa_hash_table_u64_insert(slots, slot, intr);
+ return false;
+}
+
+bool
+pan_nir_lower_store_component(nir_shader *s)
+{
+ assert(s->info.stage == MESA_SHADER_VERTEX);
+
+ struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL);
+ bool progress = nir_shader_intrinsics_pass(
+ s, lower_store_component,
+ nir_metadata_block_index | nir_metadata_dominance, stores);
+ _mesa_hash_table_u64_destroy(stores);
+ return progress;
+}
diff --git a/src/panfrost/util/pan_lower_writeout.c b/src/panfrost/util/pan_lower_writeout.c
index 7c298208515..eb528ce3bf3 100644
--- a/src/panfrost/util/pan_lower_writeout.c
+++ b/src/panfrost/util/pan_lower_writeout.c
@@ -22,8 +22,8 @@
* SOFTWARE.
*/
-#include "pan_ir.h"
#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
/* Midgard can write all of color, depth and stencil in a single writeout
* operation, so we merge depth/stencil stores with color stores.
@@ -33,224 +33,158 @@
* +ZS_EMIT with respect to +ATEST and +BLEND, as well as combining
* depth/stencil stores into a single +ZS_EMIT op.
*/
-bool
-pan_nir_lower_zs_store(nir_shader *nir)
-{
- if (nir->info.stage != MESA_SHADER_FRAGMENT)
- return false;
-
- nir_variable *z_var = NULL, *s_var = NULL;
-
- nir_foreach_shader_out_variable(var, nir) {
- if (var->data.location == FRAG_RESULT_DEPTH)
- z_var = var;
- else if (var->data.location == FRAG_RESULT_STENCIL)
- s_var = var;
- }
-
- if (!z_var && !s_var)
- return false;
-
- bool progress = false;
-
- nir_foreach_function(function, nir) {
- if (!function->impl) continue;
-
- nir_intrinsic_instr *z_store = NULL, *s_store = NULL;
-
- nir_foreach_block(block, function->impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_store_output)
- continue;
-
- if (z_var && nir_intrinsic_base(intr) == z_var->data.driver_location) {
- assert(!z_store);
- z_store = intr;
- }
-
- if (s_var && nir_intrinsic_base(intr) == s_var->data.driver_location) {
- assert(!s_store);
- s_store = intr;
- }
- }
- }
-
- if (!z_store && !s_store) continue;
-
- bool replaced = false;
-
- nir_foreach_block(block, function->impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_store_output)
- continue;
-
- const nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_out, nir_intrinsic_base(intr));
- assert(var);
-
- if (var->data.location < FRAG_RESULT_DATA0)
- continue;
- if (var->data.index)
- continue;
-
- assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
-
- nir_builder b;
- nir_builder_init(&b, function->impl);
-
- assert(!z_store || z_store->instr.block == instr->block);
- assert(!s_store || s_store->instr.block == instr->block);
- b.cursor = nir_after_block_before_jump(instr->block);
-
- nir_intrinsic_instr *combined_store;
- combined_store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_combined_output_pan);
-
- combined_store->num_components = intr->src[0].ssa->num_components;
-
- nir_intrinsic_set_base(combined_store, nir_intrinsic_base(intr));
- nir_intrinsic_set_src_type(combined_store, nir_intrinsic_src_type(intr));
-
- unsigned writeout = PAN_WRITEOUT_C;
- if (z_store)
- writeout |= PAN_WRITEOUT_Z;
- if (s_store)
- writeout |= PAN_WRITEOUT_S;
-
- nir_intrinsic_set_component(combined_store, writeout);
+/*
+ * Get the type to report for a piece of a combined store, given the store it
+ * is combining from. If there is no store to render target #0, a dummy <0.0,
+ * 0.0, 0.0, 0.0> write is used, so report a matching float32 type.
+ */
+static nir_alu_type
+pan_nir_rt_store_type(nir_intrinsic_instr *store)
+{
+ return store ? nir_intrinsic_src_type(store) : nir_type_float32;
+}
- struct nir_ssa_def *zero = nir_imm_int(&b, 0);
+static void
+pan_nir_emit_combined_store(nir_builder *b, nir_intrinsic_instr *rt0_store,
+ unsigned writeout, nir_intrinsic_instr **stores)
+{
+ nir_intrinsic_instr *intr = nir_intrinsic_instr_create(
+ b->shader, nir_intrinsic_store_combined_output_pan);
- struct nir_ssa_def *src[4] = {
- intr->src[0].ssa,
- intr->src[1].ssa,
- z_store ? z_store->src[0].ssa : zero,
- s_store ? s_store->src[0].ssa : zero,
- };
+ intr->num_components = rt0_store ? rt0_store->src[0].ssa->num_components : 4;
- for (int i = 0; i < 4; ++i)
- combined_store->src[i] = nir_src_for_ssa(src[i]);
+ if (rt0_store)
+ nir_intrinsic_set_io_semantics(intr,
+ nir_intrinsic_io_semantics(rt0_store));
+ nir_intrinsic_set_src_type(intr, pan_nir_rt_store_type(rt0_store));
+ nir_intrinsic_set_dest_type(intr, pan_nir_rt_store_type(stores[2]));
+ nir_intrinsic_set_component(intr, writeout);
- nir_builder_instr_insert(&b, &combined_store->instr);
+ nir_def *zero = nir_imm_int(b, 0);
+ nir_def *zero4 = nir_imm_ivec4(b, 0, 0, 0, 0);
- nir_instr_remove(instr);
+ nir_def *src[] = {
+ rt0_store ? rt0_store->src[0].ssa : zero4,
+ rt0_store ? rt0_store->src[1].ssa : zero,
+ stores[0] ? stores[0]->src[0].ssa : zero,
+ stores[1] ? stores[1]->src[0].ssa : zero,
+ stores[2] ? stores[2]->src[0].ssa : zero4,
+ };
- replaced = true;
- }
- }
+ for (int i = 0; i < ARRAY_SIZE(src); ++i)
+ intr->src[i] = nir_src_for_ssa(src[i]);
- /* Insert a store to the depth RT (0xff) if needed */
- if (!replaced) {
- nir_builder b;
- nir_builder_init(&b, function->impl);
+ nir_builder_instr_insert(b, &intr->instr);
+}
+bool
+pan_nir_lower_zs_store(nir_shader *nir)
+{
+ bool progress = false;
- nir_block *block = NULL;
- if (z_store && s_store)
- assert(z_store->instr.block == s_store->instr.block);
+ if (nir->info.stage != MESA_SHADER_FRAGMENT)
+ return false;
- if (z_store)
- block = z_store->instr.block;
- else
- block = s_store->instr.block;
+ nir_foreach_function_impl(impl, nir) {
+ nir_intrinsic_instr *stores[3] = {NULL};
+ unsigned writeout = 0;
- b.cursor = nir_after_block_before_jump(block);
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
- nir_intrinsic_instr *combined_store;
- combined_store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_combined_output_pan);
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
- combined_store->num_components = 4;
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+ if (sem.location == FRAG_RESULT_DEPTH) {
+ stores[0] = intr;
+ writeout |= PAN_WRITEOUT_Z;
+ } else if (sem.location == FRAG_RESULT_STENCIL) {
+ stores[1] = intr;
+ writeout |= PAN_WRITEOUT_S;
+ } else if (sem.dual_source_blend_index) {
+ assert(!stores[2]); /* there should be only 1 source for dual blending */
+ stores[2] = intr;
+ writeout |= PAN_WRITEOUT_2;
+ }
+ }
+ }
- unsigned base;
- if (z_store)
- base = nir_intrinsic_base(z_store);
- else
- base = nir_intrinsic_base(s_store);
- nir_intrinsic_set_base(combined_store, base);
- nir_intrinsic_set_src_type(combined_store, nir_type_float32);
+ if (!writeout)
+ continue;
- unsigned writeout = 0;
- if (z_store)
- writeout |= PAN_WRITEOUT_Z;
- if (s_store)
- writeout |= PAN_WRITEOUT_S;
+ nir_block *common_block = NULL;
- nir_intrinsic_set_component(combined_store, writeout);
+ /* Ensure all stores are in the same block */
+ for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
+ if (!stores[i])
+ continue;
- struct nir_ssa_def *zero = nir_imm_int(&b, 0);
+ nir_block *block = stores[i]->instr.block;
- struct nir_ssa_def *src[4] = {
- nir_imm_vec4(&b, 0, 0, 0, 0),
- zero,
- z_store ? z_store->src[0].ssa : zero,
- s_store ? s_store->src[0].ssa : zero,
- };
+ if (common_block)
+ assert(common_block == block);
+ else
+ common_block = block;
+ }
- for (int i = 0; i < 4; ++i)
- combined_store->src[i] = nir_src_for_ssa(src[i]);
+ bool replaced = false;
- nir_builder_instr_insert(&b, &combined_store->instr);
- }
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
- if (z_store)
- nir_instr_remove(&z_store->instr);
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
- if (s_store)
- nir_instr_remove(&s_store->instr);
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
- nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
- progress = true;
- }
+ if (sem.location < FRAG_RESULT_DATA0)
+ continue;
- return progress;
-}
+ if (sem.dual_source_blend_index)
+ continue;
-/* Real writeout stores, which break execution, need to be moved to after
- * dual-source stores, which are just standard register writes. */
-bool
-pan_nir_reorder_writeout(nir_shader *nir)
-{
- bool progress = false;
+ assert(nir_src_is_const(intr->src[1]) && "no indirect outputs");
- nir_foreach_function(function, nir) {
- if (!function->impl) continue;
+ nir_builder b =
+ nir_builder_at(nir_after_block_before_jump(instr->block));
- nir_foreach_block(block, function->impl) {
- nir_instr *last_writeout = NULL;
+ /* Trying to write depth twice results in the
+ * wrong blend shader being executed on
+ * Midgard */
+ unsigned this_store = PAN_WRITEOUT_C | (replaced ? 0 : writeout);
- nir_foreach_instr_reverse_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
+ pan_nir_emit_combined_store(&b, intr, this_store, stores);
- nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (intr->intrinsic != nir_intrinsic_store_output)
- continue;
+ nir_instr_remove(instr);
- const nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_out, nir_intrinsic_base(intr));
+ replaced = true;
+ }
+ }
- if (var->data.index) {
- if (!last_writeout)
- last_writeout = instr;
- continue;
- }
+ /* Insert a store to the depth RT (0xff) if needed */
+ if (!replaced) {
+ nir_builder b =
+ nir_builder_at(nir_after_block_before_jump(common_block));
- if (!last_writeout)
- continue;
+ pan_nir_emit_combined_store(&b, NULL, writeout, stores);
+ }
- /* This is a real store, so move it to after dual-source stores */
- exec_node_remove(&instr->node);
- exec_node_insert_after(&last_writeout->node, &instr->node);
+ for (unsigned i = 0; i < ARRAY_SIZE(stores); ++i) {
+ if (stores[i])
+ nir_instr_remove(&stores[i]->instr);
+ }
- progress = true;
- }
- }
- }
+ nir_metadata_preserve(impl,
+ nir_metadata_block_index | nir_metadata_dominance);
+ progress = true;
+ }
- return progress;
+ return progress;
}
diff --git a/src/panfrost/util/pan_lower_xfb.c b/src/panfrost/util/pan_lower_xfb.c
new file mode 100644
index 00000000000..3fc96b2e380
--- /dev/null
+++ b/src/panfrost/util/pan_lower_xfb.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2022 Collabora Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/nir/nir_builder.h"
+#include "pan_ir.h"
+
+static void
+lower_xfb_output(nir_builder *b, nir_intrinsic_instr *intr,
+ unsigned start_component, unsigned num_components,
+ unsigned buffer, unsigned offset_words)
+{
+ assert(buffer < MAX_XFB_BUFFERS);
+ assert(nir_intrinsic_component(intr) == 0); // TODO
+
+ /* Transform feedback info in units of words, convert to bytes. */
+ uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
+ assert(stride != 0);
+
+ uint16_t offset = offset_words * 4;
+
+ nir_def *index = nir_iadd(
+ b, nir_imul(b, nir_load_instance_id(b), nir_load_num_vertices(b)),
+ nir_load_vertex_id_zero_base(b));
+
+ BITSET_SET(b->shader->info.system_values_read,
+ SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
+ BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
+
+ nir_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
+ nir_def *addr = nir_iadd(
+ b, buf,
+ nir_u2u64(b, nir_iadd_imm(b, nir_imul_imm(b, index, stride), offset)));
+
+ nir_def *src = intr->src[0].ssa;
+ nir_def *value =
+ nir_channels(b, src, BITFIELD_MASK(num_components) << start_component);
+ nir_store_global(b, addr, 4, value, BITFIELD_MASK(num_components));
+}
+
+static bool
+lower_xfb(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
+{
+ /* In transform feedback programs, vertex ID becomes zero-based, so apply
+ * that lowering even on Valhall.
+ */
+ if (intr->intrinsic == nir_intrinsic_load_vertex_id) {
+ b->cursor = nir_instr_remove(&intr->instr);
+
+ nir_def *repl =
+ nir_iadd(b, nir_load_vertex_id_zero_base(b), nir_load_first_vertex(b));
+
+ nir_def_rewrite_uses(&intr->def, repl);
+ return true;
+ }
+
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ return false;
+
+ bool progress = false;
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ for (unsigned i = 0; i < 2; ++i) {
+ nir_io_xfb xfb =
+ i ? nir_intrinsic_io_xfb2(intr) : nir_intrinsic_io_xfb(intr);
+ for (unsigned j = 0; j < 2; ++j) {
+ if (!xfb.out[j].num_components)
+ continue;
+
+ lower_xfb_output(b, intr, i * 2 + j, xfb.out[j].num_components,
+ xfb.out[j].buffer, xfb.out[j].offset);
+ progress = true;
+ }
+ }
+
+ nir_instr_remove(&intr->instr);
+ return progress;
+}
+
+bool
+pan_lower_xfb(nir_shader *nir)
+{
+ return nir_shader_intrinsics_pass(
+ nir, lower_xfb, nir_metadata_block_index | nir_metadata_dominance, NULL);
+}
diff --git a/src/panfrost/util/pan_sysval.c b/src/panfrost/util/pan_sysval.c
deleted file mode 100644
index 80a509f5b8b..00000000000
--- a/src/panfrost/util/pan_sysval.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (C) 2020 Collabora Ltd.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors (Collabora):
- * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
- */
-
-#include "pan_ir.h"
-#include "compiler/nir/nir_builder.h"
-
-/* TODO: ssbo_size */
-static int
-panfrost_sysval_for_ssbo(nir_intrinsic_instr *instr)
-{
- nir_src index = instr->src[0];
- assert(nir_src_is_const(index));
- uint32_t uindex = nir_src_as_uint(index);
-
- return PAN_SYSVAL(SSBO, uindex);
-}
-
-static int
-panfrost_sysval_for_sampler(nir_intrinsic_instr *instr)
-{
- /* TODO: indirect samplers !!! */
- nir_src index = instr->src[0];
- assert(nir_src_is_const(index));
- uint32_t uindex = nir_src_as_uint(index);
-
- return PAN_SYSVAL(SAMPLER, uindex);
-}
-
-static int
-panfrost_sysval_for_image_size(nir_intrinsic_instr *instr)
-{
- nir_src index = instr->src[0];
- assert(nir_src_is_const(index));
-
- bool is_array = nir_intrinsic_image_array(instr);
- uint32_t uindex = nir_src_as_uint(index);
- unsigned dim = nir_intrinsic_dest_components(instr) - is_array;
-
- return PAN_SYSVAL(IMAGE_SIZE, PAN_TXS_SYSVAL_ID(uindex, dim, is_array));
-}
-
-static unsigned
-panfrost_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
-{
- switch (instr->intrinsic) {
- case nir_intrinsic_load_viewport_scale:
- return PAN_SYSVAL_VIEWPORT_SCALE;
- case nir_intrinsic_load_viewport_offset:
- return PAN_SYSVAL_VIEWPORT_OFFSET;
- case nir_intrinsic_load_num_workgroups:
- return PAN_SYSVAL_NUM_WORK_GROUPS;
- case nir_intrinsic_load_workgroup_size:
- return PAN_SYSVAL_LOCAL_GROUP_SIZE;
- case nir_intrinsic_load_work_dim:
- return PAN_SYSVAL_WORK_DIM;
- case nir_intrinsic_load_sample_positions_pan:
- return PAN_SYSVAL_SAMPLE_POSITIONS;
- case nir_intrinsic_load_first_vertex:
- case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_base_instance:
- return PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS;
- case nir_intrinsic_load_draw_id:
- return PAN_SYSVAL_DRAWID;
- case nir_intrinsic_load_ssbo_address:
- case nir_intrinsic_get_ssbo_size:
- return panfrost_sysval_for_ssbo(instr);
- case nir_intrinsic_load_sampler_lod_parameters_pan:
- return panfrost_sysval_for_sampler(instr);
- case nir_intrinsic_image_size:
- return panfrost_sysval_for_image_size(instr);
- default:
- return ~0;
- }
-}
-
-int
-panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest)
-{
- nir_intrinsic_instr *intr;
- nir_dest *dst = NULL;
- nir_tex_instr *tex;
- unsigned sysval = ~0;
-
- switch (instr->type) {
- case nir_instr_type_intrinsic:
- intr = nir_instr_as_intrinsic(instr);
- sysval = panfrost_nir_sysval_for_intrinsic(intr);
- dst = &intr->dest;
- break;
- case nir_instr_type_tex:
- tex = nir_instr_as_tex(instr);
- if (tex->op != nir_texop_txs)
- break;
-
- sysval = PAN_SYSVAL(TEXTURE_SIZE,
- PAN_TXS_SYSVAL_ID(tex->texture_index,
- nir_tex_instr_dest_size(tex) -
- (tex->is_array ? 1 : 0),
- tex->is_array));
- dst = &tex->dest;
- break;
- default:
- break;
- }
-
- if (dest && dst)
- *dest = *dst;
-
- return sysval;
-}
-
-unsigned
-pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
- struct panfrost_sysvals *sysvals,
- int sysval)
-{
- /* Try to lookup */
-
- void *cached = _mesa_hash_table_u64_search(sysval_to_id, sysval);
-
- if (cached)
- return ((uintptr_t) cached) - 1;
-
- /* Else assign */
-
- unsigned id = sysvals->sysval_count++;
- assert(id < MAX_SYSVAL_COUNT);
- _mesa_hash_table_u64_insert(sysval_to_id, sysval, (void *) ((uintptr_t) id + 1));
- sysvals->sysvals[id] = sysval;
-
- return id;
-}
-
-struct hash_table_u64 *
-panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx)
-{
- sysvals->sysval_count = 0;
- return _mesa_hash_table_u64_create(memctx);
-}