summaryrefslogtreecommitdiff
path: root/src/panfrost/lib/pan_blitter.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/panfrost/lib/pan_blitter.c')
-rw-r--r--src/panfrost/lib/pan_blitter.c2746
1 files changed, 1486 insertions, 1260 deletions
diff --git a/src/panfrost/lib/pan_blitter.c b/src/panfrost/lib/pan_blitter.c
index a17fab7dee1..b8c885b3edb 100644
--- a/src/panfrost/lib/pan_blitter.c
+++ b/src/panfrost/lib/pan_blitter.c
@@ -25,19 +25,18 @@
* Boris Brezillon <boris.brezillon@collabora.com>
*/
+#include "pan_blitter.h"
#include <math.h>
#include <stdio.h>
+#include "compiler/nir/nir_builder.h"
+#include "util/u_math.h"
#include "pan_blend.h"
-#include "pan_blitter.h"
-#include "pan_cs.h"
+#include "pan_desc.h"
#include "pan_encoder.h"
+#include "pan_jc.h"
#include "pan_pool.h"
#include "pan_shader.h"
-#include "pan_scoreboard.h"
#include "pan_texture.h"
-#include "panfrost-quirks.h"
-#include "compiler/nir/nir_builder.h"
-#include "util/u_math.h"
#if PAN_ARCH >= 6
/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
@@ -48,1449 +47,1676 @@
* This is primarily designed as a fallback for preloads but could be extended
* for other clears/blits if needed in the future. */
-static enum mali_bifrost_register_file_format
+static enum mali_register_file_format
blit_type_to_reg_fmt(nir_alu_type in)
{
- switch (in) {
- case nir_type_float32:
- return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
- case nir_type_int32:
- return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
- case nir_type_uint32:
- return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
- default:
- unreachable("Invalid blit type");
- }
+ switch (in) {
+ case nir_type_float32:
+ return MALI_REGISTER_FILE_FORMAT_F32;
+ case nir_type_int32:
+ return MALI_REGISTER_FILE_FORMAT_I32;
+ case nir_type_uint32:
+ return MALI_REGISTER_FILE_FORMAT_U32;
+ default:
+ unreachable("Invalid blit type");
+ }
}
#endif
+/* On Valhall, the driver gives the hardware a table of resource tables.
+ * Resources are addressed as the index of the table together with the index of
+ * the resource within the table. For simplicity, we put one type of resource
+ * in each table and fix the numbering of the tables.
+ *
+ * This numbering is arbitrary.
+ */
+enum pan_blit_resource_table {
+ PAN_BLIT_TABLE_ATTRIBUTE = 0,
+ PAN_BLIT_TABLE_ATTRIBUTE_BUFFER,
+ PAN_BLIT_TABLE_SAMPLER,
+ PAN_BLIT_TABLE_TEXTURE,
+
+ PAN_BLIT_NUM_RESOURCE_TABLES
+};
+
struct pan_blit_surface {
- gl_frag_result loc : 4;
- nir_alu_type type : 8;
- enum mali_texture_dimension dim : 2;
- bool array : 1;
- unsigned src_samples: 5;
- unsigned dst_samples: 5;
+ gl_frag_result loc : 4;
+ nir_alu_type type : 8;
+ enum mali_texture_dimension dim : 2;
+ bool array : 1;
+ unsigned src_samples : 5;
+ unsigned dst_samples : 5;
};
struct pan_blit_shader_key {
- struct pan_blit_surface surfaces[8];
+ struct pan_blit_surface surfaces[8];
};
struct pan_blit_shader_data {
- struct pan_blit_shader_key key;
- mali_ptr address;
- unsigned blend_ret_offsets[8];
- nir_alu_type blend_types[8];
+ struct pan_blit_shader_key key;
+ struct pan_shader_info info;
+ mali_ptr address;
+ unsigned blend_ret_offsets[8];
+ nir_alu_type blend_types[8];
};
struct pan_blit_blend_shader_key {
- enum pipe_format format;
- nir_alu_type type;
- unsigned rt : 3;
- unsigned nr_samples : 5;
- unsigned pad : 24;
+ enum pipe_format format;
+ nir_alu_type type;
+ unsigned rt : 3;
+ unsigned nr_samples : 5;
+ unsigned pad : 24;
};
struct pan_blit_blend_shader_data {
- struct pan_blit_blend_shader_key key;
- mali_ptr address;
+ struct pan_blit_blend_shader_key key;
+ mali_ptr address;
};
struct pan_blit_rsd_key {
- struct {
- enum pipe_format format;
- nir_alu_type type : 8;
- unsigned src_samples : 5;
- unsigned dst_samples : 5;
- enum mali_texture_dimension dim : 2;
- bool array : 1;
- } rts[8], z, s;
+ struct {
+ enum pipe_format format;
+ nir_alu_type type : 8;
+ unsigned src_samples : 5;
+ unsigned dst_samples : 5;
+ enum mali_texture_dimension dim : 2;
+ bool array : 1;
+ } rts[8], z, s;
};
struct pan_blit_rsd_data {
- struct pan_blit_rsd_key key;
- mali_ptr address;
+ struct pan_blit_rsd_key key;
+ mali_ptr address;
};
#if PAN_ARCH >= 5
static void
-pan_blitter_emit_blend(const struct panfrost_device *dev,
- unsigned rt,
+pan_blitter_emit_blend(unsigned rt,
const struct pan_image_view *iview,
const struct pan_blit_shader_data *blit_shader,
- mali_ptr blend_shader,
- void *out)
+ mali_ptr blend_shader, void *out)
{
- pan_pack(out, BLEND, cfg) {
- if (!iview) {
- cfg.enable = false;
+ assert(blend_shader == 0 || PAN_ARCH <= 5);
+
+ pan_pack(out, BLEND, cfg) {
+ if (!iview) {
+ cfg.enable = false;
#if PAN_ARCH >= 6
- cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;
+ cfg.internal.mode = MALI_BLEND_MODE_OFF;
#endif
- continue;
- }
+ continue;
+ }
- cfg.round_to_fb_precision = true;
- cfg.srgb = util_format_is_srgb(iview->format);
+ cfg.round_to_fb_precision = true;
+ cfg.srgb = util_format_is_srgb(iview->format);
#if PAN_ARCH >= 6
- cfg.bifrost.internal.mode = blend_shader ?
- MALI_BIFROST_BLEND_MODE_SHADER :
- MALI_BIFROST_BLEND_MODE_OPAQUE;
+ cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
#endif
- if (!blend_shader) {
+ if (!blend_shader) {
+ cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
+ cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
+ cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
+ cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
+ cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
+ cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
+ cfg.equation.color_mask = 0xf;
+
#if PAN_ARCH >= 6
- nir_alu_type type = blit_shader->key.surfaces[rt].type;
-
- cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
- cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
- cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
- cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
- cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
- cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
- cfg.bifrost.equation.color_mask = 0xf;
- cfg.bifrost.internal.fixed_function.num_comps = 4;
- cfg.bifrost.internal.fixed_function.conversion.memory_format =
- panfrost_format_to_bifrost_blend(dev, iview->format, false);
- cfg.bifrost.internal.fixed_function.conversion.register_format =
- blit_type_to_reg_fmt(type);
-
- cfg.bifrost.internal.fixed_function.rt = rt;
-#else
- cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
- cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
- cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
- cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
- cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
- cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
- cfg.midgard.equation.color_mask = 0xf;
+ nir_alu_type type = blit_shader->key.surfaces[rt].type;
+
+ cfg.internal.fixed_function.num_comps = 4;
+ cfg.internal.fixed_function.conversion.memory_format = GENX(
+ panfrost_dithered_format_from_pipe_format)(iview->format, false);
+ cfg.internal.fixed_function.conversion.register_format =
+ blit_type_to_reg_fmt(type);
+
+ cfg.internal.fixed_function.rt = rt;
#endif
- } else {
-#if PAN_ARCH >= 6
- cfg.bifrost.internal.shader.pc = blend_shader;
- if (blit_shader->blend_ret_offsets[rt]) {
- cfg.bifrost.internal.shader.return_value =
- blit_shader->address +
- blit_shader->blend_ret_offsets[rt];
- }
-#else
- cfg.midgard.blend_shader = true;
- cfg.midgard.shader_pc = blend_shader;
+ } else {
+#if PAN_ARCH <= 5
+ cfg.blend_shader = true;
+ cfg.shader_pc = blend_shader;
#endif
- }
- }
+ }
+ }
}
#endif
+struct pan_blitter_views {
+ unsigned rt_count;
+ const struct pan_image_view *src_rts[8];
+ const struct pan_image_view *dst_rts[8];
+ const struct pan_image_view *src_z;
+ const struct pan_image_view *dst_z;
+ const struct pan_image_view *src_s;
+ const struct pan_image_view *dst_s;
+};
+
+static bool
+pan_blitter_is_ms(struct pan_blitter_views *views)
+{
+ for (unsigned i = 0; i < views->rt_count; i++) {
+ if (views->dst_rts[i]) {
+ if (pan_image_view_get_nr_samples(views->dst_rts[i]) > 1)
+ return true;
+ }
+ }
+
+ if (views->dst_z && pan_image_view_get_nr_samples(views->dst_z) > 1)
+ return true;
+
+ if (views->dst_s && pan_image_view_get_nr_samples(views->dst_s) > 1)
+ return true;
+
+ return false;
+}
+
+#if PAN_ARCH >= 5
static void
-pan_blitter_emit_rsd(const struct panfrost_device *dev,
- const struct pan_blit_shader_data *blit_shader,
- unsigned rt_count,
- const struct pan_image_view **rts,
- mali_ptr *blend_shaders,
- const struct pan_image_view *z,
- const struct pan_image_view *s,
+pan_blitter_emit_blends(const struct pan_blit_shader_data *blit_shader,
+ struct pan_blitter_views *views,
+ mali_ptr *blend_shaders, void *out)
+{
+ for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
+ void *dest = out + pan_size(BLEND) * i;
+ const struct pan_image_view *rt_view = views->dst_rts[i];
+ mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
+
+ pan_blitter_emit_blend(i, rt_view, blit_shader, blend_shader, dest);
+ }
+}
+#endif
+
+#if PAN_ARCH <= 7
+static void
+pan_blitter_emit_rsd(const struct pan_blit_shader_data *blit_shader,
+ struct pan_blitter_views *views, mali_ptr *blend_shaders,
void *out)
{
- unsigned tex_count = 0;
- bool zs = (z || s);
- bool ms = false;
-
- for (unsigned i = 0; i < rt_count; i++) {
- if (rts[i]) {
- tex_count++;
- if (rts[i]->nr_samples > 1)
- ms = true;
- }
- }
-
- if (z) {
- if (z->image->layout.nr_samples > 1)
- ms = true;
- tex_count++;
- }
-
- if (s) {
- if (s->image->layout.nr_samples > 1)
- ms = true;
- tex_count++;
- }
-
- pan_pack(out, RENDERER_STATE, cfg) {
- assert(blit_shader->address);
- cfg.shader.shader = blit_shader->address;
- cfg.shader.varying_count = 1;
- cfg.shader.texture_count = tex_count;
- cfg.shader.sampler_count = 1;
-
- cfg.properties.stencil_from_shader = s != NULL;
- cfg.properties.depth_source =
- z ?
- MALI_DEPTH_SOURCE_SHADER :
- MALI_DEPTH_SOURCE_FIXED_FUNCTION;
-
- cfg.multisample_misc.sample_mask = 0xFFFF;
- cfg.multisample_misc.multisample_enable = ms;
- cfg.multisample_misc.evaluate_per_sample = ms;
- cfg.multisample_misc.depth_write_mask = z != NULL;
- cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
-
- cfg.stencil_mask_misc.stencil_enable = s != NULL;
- cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
- cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
- cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
- cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
- cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
- cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
- cfg.stencil_front.mask = 0xFF;
- cfg.stencil_back = cfg.stencil_front;
+ UNUSED bool zs = (views->dst_z || views->dst_s);
+ bool ms = pan_blitter_is_ms(views);
+
+ pan_pack(out, RENDERER_STATE, cfg) {
+ assert(blit_shader->address);
+ pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
+
+ cfg.multisample_misc.sample_mask = 0xFFFF;
+ cfg.multisample_misc.multisample_enable = ms;
+ cfg.multisample_misc.evaluate_per_sample = ms;
+ cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
+ cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
+
+ cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
+ cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
+ cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
+ cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
+ cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
+ cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
+ cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
+ cfg.stencil_front.mask = 0xFF;
+ cfg.stencil_back = cfg.stencil_front;
#if PAN_ARCH >= 6
- if (zs) {
- cfg.properties.bifrost.zs_update_operation =
- MALI_PIXEL_KILL_FORCE_LATE;
- cfg.properties.bifrost.pixel_kill_operation =
- MALI_PIXEL_KILL_FORCE_LATE;
- } else {
- cfg.properties.bifrost.zs_update_operation =
- MALI_PIXEL_KILL_STRONG_EARLY;
- cfg.properties.bifrost.pixel_kill_operation =
- MALI_PIXEL_KILL_FORCE_EARLY;
- }
-
- /* We can only allow blit shader fragments to kill if they write all
- * colour outputs. This is true for our colour (non-Z/S) blit shaders,
- * but obviously not true for Z/S shaders. However, blit shaders
- * otherwise lack side effects, so other fragments may kill them.
- * However, while shaders writing Z/S can normally be killed, on v6
- * for frame shaders it can cause GPU timeouts, so only allow colour
- * blit shaders to be killed. */
-
- cfg.properties.bifrost.allow_forward_pixel_to_kill = !zs;
- cfg.properties.bifrost.allow_forward_pixel_to_be_killed = (dev->arch >= 7) || !zs;
-
- cfg.preload.fragment.coverage = true;
- cfg.preload.fragment.sample_mask_id = ms;
+ if (zs) {
+ /* Writing Z/S requires late updates */
+ cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
+ cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
+ } else {
+ /* Skipping ATEST requires forcing Z/S */
+ cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
+ cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+ }
+
+ /* However, while shaders writing Z/S can normally be killed, on v6
+ * for frame shaders it can cause GPU timeouts, so only allow colour
+ * blit shaders to be killed. */
+ cfg.properties.allow_forward_pixel_to_kill = !zs;
+
+ if (PAN_ARCH == 6)
+ cfg.properties.allow_forward_pixel_to_be_killed = !zs;
#else
- mali_ptr blend_shader = blend_shaders ?
- panfrost_last_nonnull(blend_shaders, rt_count) : 0;
- cfg.properties.midgard.work_register_count = 4;
- cfg.properties.midgard.force_early_z = !zs;
- cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
+ mali_ptr blend_shader =
+ blend_shaders
+ ? panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1))
+ : 0;
- /* Set even on v5 for erratum workaround */
- cfg.sfbd_blend_shader = blend_shader;
-#if PAN_ARCH == 4
- cfg.stencil_mask_misc.sfbd_write_enable = true;
- cfg.stencil_mask_misc.sfbd_dither_disable = true;
- cfg.multisample_misc.sfbd_blend_shader = !!blend_shader;
- cfg.sfbd_blend_shader = blend_shader;
- if (!cfg.multisample_misc.sfbd_blend_shader) {
- cfg.sfbd_blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
- cfg.sfbd_blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
- cfg.sfbd_blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
- cfg.sfbd_blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
- cfg.sfbd_blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
- cfg.sfbd_blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
- cfg.sfbd_blend_constant = 0;
-
- if (rts && rts[0]) {
- cfg.stencil_mask_misc.sfbd_srgb =
- util_format_is_srgb(rts[0]->format);
- cfg.sfbd_blend_equation.color_mask = 0xf;
- }
- }
+ cfg.properties.work_register_count = 4;
+ cfg.properties.force_early_z = !zs;
+ cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
+
+ /* Set even on v5 for erratum workaround */
+#if PAN_ARCH == 5
+ cfg.legacy_blend_shader = blend_shader;
+#else
+ cfg.blend_shader = blend_shader;
+ cfg.stencil_mask_misc.write_enable = true;
+ cfg.stencil_mask_misc.dither_disable = true;
+ cfg.multisample_misc.blend_shader = !!blend_shader;
+ cfg.blend_shader = blend_shader;
+ if (!cfg.multisample_misc.blend_shader) {
+ cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
+ cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
+ cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
+ cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
+ cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
+ cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
+ cfg.blend_constant = 0;
+
+ if (views->dst_rts[0] != NULL) {
+ cfg.stencil_mask_misc.srgb =
+ util_format_is_srgb(views->dst_rts[0]->format);
+ cfg.blend_equation.color_mask = 0xf;
+ }
+ }
#endif
#endif
- }
+ }
#if PAN_ARCH >= 5
- for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
- void *dest = out + pan_size(RENDERER_STATE) + pan_size(BLEND) * i;
- const struct pan_image_view *rt_view = rts ? rts[i] : NULL;
- mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
-
- pan_blitter_emit_blend(dev, i, rt_view, blit_shader,
- blend_shader, dest);
- }
+ pan_blitter_emit_blends(blit_shader, views, blend_shaders,
+ out + pan_size(RENDERER_STATE));
#endif
}
+#endif
+#if PAN_ARCH <= 5
static void
-pan_blitter_get_blend_shaders(struct panfrost_device *dev,
+pan_blitter_get_blend_shaders(struct pan_blitter_cache *cache,
unsigned rt_count,
const struct pan_image_view **rts,
const struct pan_blit_shader_data *blit_shader,
mali_ptr *blend_shaders)
{
- if (!rt_count)
- return;
-
- struct pan_blend_state blend_state = {
- .rt_count = rt_count,
- };
-
- for (unsigned i = 0; i < rt_count; i++) {
- if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
- continue;
-
- struct pan_blit_blend_shader_key key = {
- .format = rts[i]->format,
- .rt = i,
- .nr_samples = rts[i]->image->layout.nr_samples,
- .type = blit_shader->blend_types[i],
- };
-
- pthread_mutex_lock(&dev->blitter.shaders.lock);
- struct hash_entry *he =
- _mesa_hash_table_search(dev->blitter.shaders.blend, &key);
- struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
- if (blend_shader) {
- blend_shaders[i] = blend_shader->address;
- pthread_mutex_unlock(&dev->blitter.shaders.lock);
- continue;
- }
-
- blend_shader = rzalloc(dev->blitter.shaders.blend,
- struct pan_blit_blend_shader_data);
- blend_shader->key = key;
-
- blend_state.rts[i] = (struct pan_blend_rt_state) {
- .format = rts[i]->format,
- .nr_samples = rts[i]->image->layout.nr_samples,
- .equation = {
- .blend_enable = true,
- .rgb_src_factor = BLEND_FACTOR_ZERO,
- .rgb_invert_src_factor = true,
- .rgb_dst_factor = BLEND_FACTOR_ZERO,
- .rgb_func = BLEND_FUNC_ADD,
- .alpha_src_factor = BLEND_FACTOR_ZERO,
- .alpha_invert_src_factor = true,
- .alpha_dst_factor = BLEND_FACTOR_ZERO,
- .alpha_func = BLEND_FUNC_ADD,
- .color_mask = 0xf,
- },
- };
-
- pthread_mutex_lock(&dev->blend_shaders.lock);
- struct pan_blend_shader_variant *b =
- GENX(pan_blend_get_shader_locked)(dev, &blend_state,
- blit_shader->blend_types[i],
- nir_type_float32, /* unused */
- i);
-
- ASSERTED unsigned full_threads =
- (dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4);
- assert(b->work_reg_count <= full_threads);
- struct panfrost_ptr bin =
- pan_pool_alloc_aligned(dev->blitter.shaders.pool,
- b->binary.size,
- PAN_ARCH >= 6 ? 128 : 64);
- memcpy(bin.cpu, b->binary.data, b->binary.size);
-
- blend_shader->address = bin.gpu | b->first_tag;
- pthread_mutex_unlock(&dev->blend_shaders.lock);
- _mesa_hash_table_insert(dev->blitter.shaders.blend,
- &blend_shader->key, blend_shader);
- pthread_mutex_unlock(&dev->blitter.shaders.lock);
- blend_shaders[i] = blend_shader->address;
- }
+ if (!rt_count)
+ return;
+
+ struct pan_blend_state blend_state = {
+ .rt_count = rt_count,
+ };
+
+ for (unsigned i = 0; i < rt_count; i++) {
+ if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
+ continue;
+
+ struct pan_blit_blend_shader_key key = {
+ .format = rts[i]->format,
+ .rt = i,
+ .nr_samples = pan_image_view_get_nr_samples(rts[i]),
+ .type = blit_shader->blend_types[i],
+ };
+
+ pthread_mutex_lock(&cache->shaders.lock);
+ struct hash_entry *he =
+ _mesa_hash_table_search(cache->shaders.blend, &key);
+ struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
+ if (blend_shader) {
+ blend_shaders[i] = blend_shader->address;
+ pthread_mutex_unlock(&cache->shaders.lock);
+ continue;
+ }
+
+ blend_shader =
+ rzalloc(cache->shaders.blend, struct pan_blit_blend_shader_data);
+ blend_shader->key = key;
+
+ blend_state.rts[i] = (struct pan_blend_rt_state){
+ .format = rts[i]->format,
+ .nr_samples = pan_image_view_get_nr_samples(rts[i]),
+ .equation =
+ {
+ .blend_enable = false,
+ .color_mask = 0xf,
+ },
+ };
+
+ pthread_mutex_lock(&cache->blend_shader_cache->lock);
+ struct pan_blend_shader_variant *b = GENX(pan_blend_get_shader_locked)(
+ cache->blend_shader_cache, &blend_state, blit_shader->blend_types[i],
+ nir_type_float32, /* unused */
+ i);
+
+ assert(b->work_reg_count <= 4);
+ struct panfrost_ptr bin =
+ pan_pool_alloc_aligned(cache->shaders.pool, b->binary.size, 64);
+ memcpy(bin.cpu, b->binary.data, b->binary.size);
+
+ blend_shader->address = bin.gpu | b->first_tag;
+ pthread_mutex_unlock(&cache->blend_shader_cache->lock);
+ _mesa_hash_table_insert(cache->shaders.blend, &blend_shader->key,
+ blend_shader);
+ pthread_mutex_unlock(&cache->shaders.lock);
+ blend_shaders[i] = blend_shader->address;
+ }
+}
+#endif
+
+/*
+ * Early Mali GPUs did not respect sampler LOD clamps or bias, so the Midgard
+ * compiler inserts lowering code with a load_sampler_lod_parameters_pan sysval
+ * that we need to lower. Our samplers do not use LOD clamps or bias, so we
+ * lower to the identity settings and let constant folding get rid of the
+ * unnecessary lowering.
+ */
+static bool
+lower_sampler_parameters(nir_builder *b, nir_intrinsic_instr *intr,
+ UNUSED void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_load_sampler_lod_parameters_pan)
+ return false;
+
+ const nir_const_value constants[4] = {
+ nir_const_value_for_float(0.0f, 32), /* min_lod */
+ nir_const_value_for_float(INFINITY, 32), /* max_lod */
+ nir_const_value_for_float(0.0f, 32), /* lod_bias */
+ };
+
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_def_rewrite_uses(&intr->def, nir_build_imm(b, 3, 32, constants));
+ return true;
+}
+
+static uint32_t
+sampler_hw_index(uint32_t index)
+{
+ return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_SAMPLER, index) : index;
+}
+
+static uint32_t
+tex_hw_index(uint32_t index)
+{
+ return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_TEXTURE, index) : index;
+}
+
+static uint32_t
+attr_hw_index(uint32_t index)
+{
+ return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_ATTRIBUTE, index)
+ : index;
}
static const struct pan_blit_shader_data *
-pan_blitter_get_blit_shader(struct panfrost_device *dev,
+pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
const struct pan_blit_shader_key *key)
{
- pthread_mutex_lock(&dev->blitter.shaders.lock);
- struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);
- struct pan_blit_shader_data *shader = he ? he->data : NULL;
-
- if (shader)
- goto out;
-
- unsigned coord_comps = 0;
- unsigned sig_offset = 0;
- char sig[256];
- bool first = true;
- for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
- const char *type_str, *dim_str;
- if (key->surfaces[i].type == nir_type_invalid)
- continue;
-
- switch (key->surfaces[i].type) {
- case nir_type_float32: type_str = "float"; break;
- case nir_type_uint32: type_str = "uint"; break;
- case nir_type_int32: type_str = "int"; break;
- default: unreachable("Invalid type\n");
- }
-
- switch (key->surfaces[i].dim) {
- case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;
- case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;
- case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;
- case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;
- default: unreachable("Invalid dim\n");
- }
-
- coord_comps = MAX2(coord_comps,
- (key->surfaces[i].dim ? : 3) +
- (key->surfaces[i].array ? 1 : 0));
- first = false;
-
- if (sig_offset >= sizeof(sig))
- continue;
-
- sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
- "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
- first ? "" : ",",
- gl_frag_result_name(key->surfaces[i].loc),
- type_str, dim_str,
- key->surfaces[i].array ? "[]" : "",
- key->surfaces[i].src_samples,
- key->surfaces[i].dst_samples);
- }
-
- nir_builder b =
- nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
- GENX(pan_shader_get_compiler_options)(),
- "pan_blit(%s)", sig);
- b.shader->info.internal = true;
-
- nir_variable *coord_var =
- nir_variable_create(b.shader, nir_var_shader_in,
- glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),
- "coord");
- coord_var->data.location = VARYING_SLOT_TEX0;
-
- nir_ssa_def *coord = nir_load_var(&b, coord_var);
-
- unsigned active_count = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
- if (key->surfaces[i].type == nir_type_invalid)
- continue;
-
- /* Resolve operations only work for N -> 1 samples. */
- assert(key->surfaces[i].dst_samples == 1 ||
- key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
-
- static const char *out_names[] = {
- "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
- };
-
- unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
- nir_variable *out =
- nir_variable_create(b.shader, nir_var_shader_out,
- glsl_vector_type(GLSL_TYPE_FLOAT, ncomps),
- out_names[active_count]);
- out->data.location = key->surfaces[i].loc;
- out->data.driver_location = active_count;
-
- bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
- bool ms = key->surfaces[i].src_samples > 1;
- enum glsl_sampler_dim sampler_dim;
-
- switch (key->surfaces[i].dim) {
- case MALI_TEXTURE_DIMENSION_1D:
- sampler_dim = GLSL_SAMPLER_DIM_1D;
- break;
- case MALI_TEXTURE_DIMENSION_2D:
- sampler_dim = ms ?
- GLSL_SAMPLER_DIM_MS :
- GLSL_SAMPLER_DIM_2D;
- break;
- case MALI_TEXTURE_DIMENSION_3D:
- sampler_dim = GLSL_SAMPLER_DIM_3D;
- break;
- case MALI_TEXTURE_DIMENSION_CUBE:
- sampler_dim = GLSL_SAMPLER_DIM_CUBE;
- break;
- }
-
- nir_ssa_def *res = NULL;
-
- if (resolve) {
- /* When resolving a float type, we need to calculate
- * the average of all samples. For integer resolve, GL
- * and Vulkan say that one sample should be chosen
- * without telling which. Let's just pick the first one
- * in that case.
- */
- nir_alu_type base_type =
- nir_alu_type_get_base_type(key->surfaces[i].type);
- unsigned nsamples = base_type == nir_type_float ?
- key->surfaces[i].src_samples : 1;
-
- for (unsigned s = 0; s < nsamples; s++) {
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
-
- tex->op = nir_texop_txf_ms;
- tex->dest_type = key->surfaces[i].type;
- tex->texture_index = active_count;
- tex->is_array = key->surfaces[i].array;
- tex->sampler_dim = sampler_dim;
-
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
- tex->coord_components = coord_comps;
-
- tex->src[1].src_type = nir_tex_src_ms_index;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
-
- tex->src[2].src_type = nir_tex_src_lod;
- tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
- nir_builder_instr_insert(&b, &tex->instr);
-
- res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
- }
-
- if (base_type == nir_type_float) {
- unsigned type_sz =
- nir_alu_type_get_type_size(key->surfaces[i].type);
- res = nir_fmul(&b, res,
- nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
- }
- } else {
- nir_tex_instr *tex =
- nir_tex_instr_create(b.shader, ms ? 3 : 1);
-
- tex->dest_type = key->surfaces[i].type;
- tex->texture_index = active_count;
- tex->is_array = key->surfaces[i].array;
- tex->sampler_dim = sampler_dim;
-
- if (ms) {
- tex->op = nir_texop_txf_ms;
-
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
- tex->coord_components = coord_comps;
-
- tex->src[1].src_type = nir_tex_src_ms_index;
- tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
-
- tex->src[2].src_type = nir_tex_src_lod;
- tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- } else {
- tex->op = nir_texop_tex;
-
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(coord);
- tex->coord_components = coord_comps;
- }
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
- nir_builder_instr_insert(&b, &tex->instr);
- res = &tex->dest.ssa;
- }
-
- assert(res);
-
- if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
- nir_store_var(&b, out, res, 0xFF);
- } else {
- unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
- nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
- }
- active_count++;
- }
-
- struct panfrost_compile_inputs inputs = {
- .gpu_id = dev->gpu_id,
- .is_blit = true,
- };
- struct util_dynarray binary;
- struct pan_shader_info info;
-
- util_dynarray_init(&binary, NULL);
-
- GENX(pan_shader_compile)(b.shader, &inputs, &binary, &info);
-
- shader = rzalloc(dev->blitter.shaders.blit,
- struct pan_blit_shader_data);
- shader->key = *key;
- shader->address =
- pan_pool_upload_aligned(dev->blitter.shaders.pool,
- binary.data, binary.size,
- PAN_ARCH >= 6 ? 128 : 64);
-
- util_dynarray_fini(&binary);
- ralloc_free(b.shader);
+ pthread_mutex_lock(&cache->shaders.lock);
+ struct hash_entry *he =
+ _mesa_hash_table_search(cache->shaders.blit, key);
+ struct pan_blit_shader_data *shader = he ? he->data : NULL;
+
+ if (shader)
+ goto out;
+
+ unsigned coord_comps = 0;
+ unsigned sig_offset = 0;
+ char sig[256];
+ bool first = true;
+ for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
+ const char *type_str, *dim_str;
+ if (key->surfaces[i].type == nir_type_invalid)
+ continue;
+
+ switch (key->surfaces[i].type) {
+ case nir_type_float32:
+ type_str = "float";
+ break;
+ case nir_type_uint32:
+ type_str = "uint";
+ break;
+ case nir_type_int32:
+ type_str = "int";
+ break;
+ default:
+ unreachable("Invalid type\n");
+ }
+
+ switch (key->surfaces[i].dim) {
+ case MALI_TEXTURE_DIMENSION_CUBE:
+ dim_str = "cube";
+ break;
+ case MALI_TEXTURE_DIMENSION_1D:
+ dim_str = "1D";
+ break;
+ case MALI_TEXTURE_DIMENSION_2D:
+ dim_str = "2D";
+ break;
+ case MALI_TEXTURE_DIMENSION_3D:
+ dim_str = "3D";
+ break;
+ default:
+ unreachable("Invalid dim\n");
+ }
+
+ coord_comps = MAX2(coord_comps, (key->surfaces[i].dim ?: 3) +
+ (key->surfaces[i].array ? 1 : 0));
+ first = false;
+
+ if (sig_offset >= sizeof(sig))
+ continue;
+
+ sig_offset +=
+ snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
+ "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
+ first ? "" : ",", gl_frag_result_name(key->surfaces[i].loc),
+ type_str, dim_str, key->surfaces[i].array ? "[]" : "",
+ key->surfaces[i].src_samples, key->surfaces[i].dst_samples);
+ }
+
+ nir_builder b = nir_builder_init_simple_shader(
+ MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
+ "pan_blit(%s)", sig);
+
+ nir_def *barycentric = nir_load_barycentric(
+ &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH);
+ nir_def *coord = nir_load_interpolated_input(
+ &b, coord_comps, 32, barycentric, nir_imm_int(&b, 0),
+ .base = attr_hw_index(0), .dest_type = nir_type_float32,
+ .io_semantics.location = VARYING_SLOT_VAR0, .io_semantics.num_slots = 1);
+
+ unsigned active_count = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
+ if (key->surfaces[i].type == nir_type_invalid)
+ continue;
+
+ /* Resolve operations only work for N -> 1 samples. */
+ assert(key->surfaces[i].dst_samples == 1 ||
+ key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
+
+ bool resolve =
+ key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
+ bool ms = key->surfaces[i].src_samples > 1;
+ enum glsl_sampler_dim sampler_dim;
+
+ switch (key->surfaces[i].dim) {
+ case MALI_TEXTURE_DIMENSION_1D:
+ sampler_dim = GLSL_SAMPLER_DIM_1D;
+ break;
+ case MALI_TEXTURE_DIMENSION_2D:
+ sampler_dim = ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
+ break;
+ case MALI_TEXTURE_DIMENSION_3D:
+ sampler_dim = GLSL_SAMPLER_DIM_3D;
+ break;
+ case MALI_TEXTURE_DIMENSION_CUBE:
+ sampler_dim = GLSL_SAMPLER_DIM_CUBE;
+ break;
+ }
+
+ nir_def *res = NULL;
+
+ if (resolve) {
+ /* When resolving a float type, we need to calculate
+ * the average of all samples. For integer resolve, GL
+ * and Vulkan say that one sample should be chosen
+ * without telling which. Let's just pick the first one
+ * in that case.
+ */
+ nir_alu_type base_type =
+ nir_alu_type_get_base_type(key->surfaces[i].type);
+ unsigned nsamples =
+ base_type == nir_type_float ? key->surfaces[i].src_samples : 1;
+
+ for (unsigned s = 0; s < nsamples; s++) {
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+
+ tex->op = nir_texop_txf_ms;
+ tex->dest_type = key->surfaces[i].type;
+ tex->texture_index = tex_hw_index(active_count);
+ tex->sampler_index = sampler_hw_index(0);
+ tex->is_array = key->surfaces[i].array;
+ tex->sampler_dim = sampler_dim;
+
+ tex->src[0] =
+ nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
+ tex->coord_components = coord_comps;
+
+ tex->src[1] =
+ nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_imm_int(&b, s));
+
+ tex->src[2] =
+ nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
+ nir_def_init(&tex->instr, &tex->def, 4, 32);
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ res = res ? nir_fadd(&b, res, &tex->def) : &tex->def;
+ }
+
+ if (base_type == nir_type_float)
+ res = nir_fmul_imm(&b, res, 1.0f / nsamples);
+ } else {
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1);
+
+ tex->dest_type = key->surfaces[i].type;
+ tex->texture_index = tex_hw_index(active_count);
+ tex->sampler_index = sampler_hw_index(0);
+ tex->is_array = key->surfaces[i].array;
+ tex->sampler_dim = sampler_dim;
+
+ if (ms) {
+ tex->op = nir_texop_txf_ms;
+
+ tex->src[0] =
+ nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
+ tex->coord_components = coord_comps;
+
+ tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
+ nir_load_sample_id(&b));
+
+ tex->src[2] =
+ nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
+ } else {
+ tex->op = nir_texop_txl;
+
+ tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
+ tex->coord_components = coord_comps;
+ }
+
+ nir_def_init(&tex->instr, &tex->def, 4, 32);
+ nir_builder_instr_insert(&b, &tex->instr);
+ res = &tex->def;
+ }
+
+ assert(res);
+
+ if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
+ nir_store_output(
+ &b, res, nir_imm_int(&b, 0), .base = active_count,
+ .src_type = key->surfaces[i].type,
+ .io_semantics.location = key->surfaces[i].loc,
+ .io_semantics.num_slots = 1,
+ .write_mask = nir_component_mask(res->num_components));
+ } else {
+ unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
+ nir_store_output(
+ &b, nir_channel(&b, res, c), nir_imm_int(&b, 0),
+ .base = active_count, .src_type = key->surfaces[i].type,
+ .io_semantics.location = key->surfaces[i].loc,
+ .io_semantics.num_slots = 1, .write_mask = nir_component_mask(1));
+ }
+ active_count++;
+ }
+
+ struct panfrost_compile_inputs inputs = {
+ .gpu_id = cache->gpu_id,
+ .is_blit = true,
+ .no_idvs = true,
+ };
+ struct util_dynarray binary;
+
+ util_dynarray_init(&binary, NULL);
+
+ shader = rzalloc(cache->shaders.blit, struct pan_blit_shader_data);
+
+ nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
+
+ for (unsigned i = 0; i < active_count; ++i)
+ BITSET_SET(b.shader->info.textures_used, i);
+
+ pan_shader_preprocess(b.shader, inputs.gpu_id);
+
+ if (PAN_ARCH == 4) {
+ NIR_PASS_V(b.shader, nir_shader_intrinsics_pass, lower_sampler_parameters,
+ nir_metadata_block_index | nir_metadata_dominance, NULL);
+ }
+
+ GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
+
+ shader->key = *key;
+ shader->address =
+ pan_pool_upload_aligned(cache->shaders.pool, binary.data,
+ binary.size, PAN_ARCH >= 6 ? 128 : 64);
+
+ util_dynarray_fini(&binary);
+ ralloc_free(b.shader);
-#if PAN_ARCH <= 5
- shader->address |= info.midgard.first_tag;
-#else
- for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
- shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
- shader->blend_types[i] = info.bifrost.blend[i].type;
- }
+#if PAN_ARCH >= 6
+ for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
+ shader->blend_ret_offsets[i] =
+ shader->info.bifrost.blend[i].return_offset;
+ shader->blend_types[i] = shader->info.bifrost.blend[i].type;
+ }
#endif
- _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
+ _mesa_hash_table_insert(cache->shaders.blit, &shader->key, shader);
out:
- pthread_mutex_unlock(&dev->blitter.shaders.lock);
- return shader;
+ pthread_mutex_unlock(&cache->shaders.lock);
+ return shader;
}
-static mali_ptr
-pan_blitter_get_rsd(struct panfrost_device *dev,
- unsigned rt_count,
- const struct pan_image_view **src_rts,
- const struct pan_image_view **dst_rts,
- const struct pan_image_view *src_z,
- const struct pan_image_view *dst_z,
- const struct pan_image_view *src_s,
- const struct pan_image_view *dst_s)
+static struct pan_blit_shader_key
+pan_blitter_get_key(struct pan_blitter_views *views)
{
- struct pan_blit_rsd_key rsd_key = { 0 };
-
- assert(!rt_count || (!src_z && !src_s));
-
- struct pan_blit_shader_key blit_key = { 0 };
-
- if (src_z) {
- assert(dst_z);
- rsd_key.z.format = dst_z->format;
- blit_key.surfaces[0].loc = FRAG_RESULT_DEPTH;
- rsd_key.z.type = blit_key.surfaces[0].type = nir_type_float32;
- rsd_key.z.src_samples = blit_key.surfaces[0].src_samples = src_z->image->layout.nr_samples;
- rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples = dst_z->image->layout.nr_samples;
- rsd_key.z.dim = blit_key.surfaces[0].dim = src_z->dim;
- rsd_key.z.array = blit_key.surfaces[0].array = src_z->first_layer != src_z->last_layer;
- }
-
- if (src_s) {
- assert(dst_s);
- rsd_key.s.format = dst_s->format;
- blit_key.surfaces[1].loc = FRAG_RESULT_STENCIL;
- rsd_key.s.type = blit_key.surfaces[1].type = nir_type_uint32;
- rsd_key.s.src_samples = blit_key.surfaces[1].src_samples = src_s->image->layout.nr_samples;
- rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples = dst_s->image->layout.nr_samples;
- rsd_key.s.dim = blit_key.surfaces[1].dim = src_s->dim;
- rsd_key.s.array = blit_key.surfaces[1].array = src_s->first_layer != src_s->last_layer;
- }
-
- for (unsigned i = 0; i < rt_count; i++) {
- if (!src_rts[i])
- continue;
-
- assert(dst_rts[i]);
- rsd_key.rts[i].format = dst_rts[i]->format;
- blit_key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
- rsd_key.rts[i].type = blit_key.surfaces[i].type =
- util_format_is_pure_uint(src_rts[i]->format) ? nir_type_uint32 :
- util_format_is_pure_sint(src_rts[i]->format) ? nir_type_int32 :
- nir_type_float32;
- rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples = src_rts[i]->image->layout.nr_samples;
- rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples = dst_rts[i]->image->layout.nr_samples;
- rsd_key.rts[i].dim = blit_key.surfaces[i].dim = src_rts[i]->dim;
- rsd_key.rts[i].array = blit_key.surfaces[i].array = src_rts[i]->first_layer != src_rts[i]->last_layer;
- }
-
- pthread_mutex_lock(&dev->blitter.rsds.lock);
- struct hash_entry *he =
- _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
- struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
- if (rsd)
- goto out;
-
- rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
- rsd->key = rsd_key;
-
- unsigned bd_count = PAN_ARCH >= 5 ? MAX2(rt_count, 1) : 0;
- struct panfrost_ptr rsd_ptr =
- pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,
- PAN_DESC(RENDERER_STATE),
- PAN_DESC_ARRAY(bd_count, BLEND));
-
- mali_ptr blend_shaders[8] = { 0 };
-
- const struct pan_blit_shader_data *blit_shader =
- pan_blitter_get_blit_shader(dev, &blit_key);
-
- pan_blitter_get_blend_shaders(dev, rt_count, dst_rts,
- blit_shader, blend_shaders);
-
- pan_blitter_emit_rsd(dev, blit_shader,
- MAX2(rt_count, 1), dst_rts, blend_shaders,
- dst_z, dst_s, rsd_ptr.cpu);
- rsd->address = rsd_ptr.gpu;
- _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
-
-out:
- pthread_mutex_unlock(&dev->blitter.rsds.lock);
- return rsd->address;
+ struct pan_blit_shader_key key = {0};
+
+ if (views->src_z) {
+ assert(views->dst_z);
+ key.surfaces[0].loc = FRAG_RESULT_DEPTH;
+ key.surfaces[0].type = nir_type_float32;
+ key.surfaces[0].src_samples = pan_image_view_get_nr_samples(views->src_z);
+ key.surfaces[0].dst_samples = pan_image_view_get_nr_samples(views->dst_z);
+ key.surfaces[0].dim = views->src_z->dim;
+ key.surfaces[0].array =
+ views->src_z->first_layer != views->src_z->last_layer;
+ }
+
+ if (views->src_s) {
+ assert(views->dst_s);
+ key.surfaces[1].loc = FRAG_RESULT_STENCIL;
+ key.surfaces[1].type = nir_type_uint32;
+ key.surfaces[1].src_samples = pan_image_view_get_nr_samples(views->src_s);
+ key.surfaces[1].dst_samples = pan_image_view_get_nr_samples(views->dst_s);
+ key.surfaces[1].dim = views->src_s->dim;
+ key.surfaces[1].array =
+ views->src_s->first_layer != views->src_s->last_layer;
+ }
+
+ for (unsigned i = 0; i < views->rt_count; i++) {
+ if (!views->src_rts[i])
+ continue;
+
+ assert(views->dst_rts[i]);
+ key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
+ key.surfaces[i].type =
+ util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32
+ : util_format_is_pure_sint(views->src_rts[i]->format)
+ ? nir_type_int32
+ : nir_type_float32;
+ key.surfaces[i].src_samples =
+ pan_image_view_get_nr_samples(views->src_rts[i]);
+ key.surfaces[i].dst_samples =
+ pan_image_view_get_nr_samples(views->dst_rts[i]);
+ key.surfaces[i].dim = views->src_rts[i]->dim;
+ key.surfaces[i].array =
+ views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
+ }
+
+ return key;
}
+#if PAN_ARCH <= 7
static mali_ptr
-pan_preload_get_rsd(struct panfrost_device *dev,
- const struct pan_fb_info *fb,
- bool zs)
+pan_blitter_get_rsd(struct pan_blitter_cache *cache,
+ struct pan_blitter_views *views)
{
- const struct pan_image_view *rts[8] = { NULL };
- const struct pan_image_view *z = NULL, *s = NULL;
- struct pan_image_view patched_s_view;
- unsigned rt_count = 0;
-
- if (zs) {
- if (fb->zs.preload.z)
- z = fb->zs.view.zs;
-
- if (fb->zs.preload.s) {
- const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
- enum pipe_format fmt = util_format_get_depth_only(view->format);
-
- switch (view->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
- default: fmt = view->format; break;
- }
-
- if (fmt != view->format) {
- patched_s_view = *view;
- patched_s_view.format = fmt;
- s = &patched_s_view;
- } else {
- s = view;
- }
- }
- } else {
- for (unsigned i = 0; i < fb->rt_count; i++) {
- if (fb->rts[i].preload)
- rts[i] = fb->rts[i].view;
- }
-
- rt_count = fb->rt_count;
- }
-
- return pan_blitter_get_rsd(dev, rt_count, rts, rts, z, z, s, s);
+ struct pan_blit_rsd_key rsd_key = {0};
+
+ assert(!views->rt_count || (!views->src_z && !views->src_s));
+
+ struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
+
+ if (views->src_z) {
+ assert(views->dst_z);
+ rsd_key.z.format = views->dst_z->format;
+ rsd_key.z.type = blit_key.surfaces[0].type;
+ rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
+ rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
+ rsd_key.z.dim = blit_key.surfaces[0].dim;
+ rsd_key.z.array = blit_key.surfaces[0].array;
+ }
+
+ if (views->src_s) {
+ assert(views->dst_s);
+ rsd_key.s.format = views->dst_s->format;
+ rsd_key.s.type = blit_key.surfaces[1].type;
+ rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
+ rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
+ rsd_key.s.dim = blit_key.surfaces[1].dim;
+ rsd_key.s.array = blit_key.surfaces[1].array;
+ }
+
+ for (unsigned i = 0; i < views->rt_count; i++) {
+ if (!views->src_rts[i])
+ continue;
+
+ assert(views->dst_rts[i]);
+ rsd_key.rts[i].format = views->dst_rts[i]->format;
+ rsd_key.rts[i].type = blit_key.surfaces[i].type;
+ rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
+ rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
+ rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
+ rsd_key.rts[i].array = blit_key.surfaces[i].array;
+ }
+
+ pthread_mutex_lock(&cache->rsds.lock);
+ struct hash_entry *he =
+ _mesa_hash_table_search(cache->rsds.rsds, &rsd_key);
+ struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
+ if (rsd)
+ goto out;
+
+ rsd = rzalloc(cache->rsds.rsds, struct pan_blit_rsd_data);
+ rsd->key = rsd_key;
+
+#if PAN_ARCH == 4
+ struct panfrost_ptr rsd_ptr =
+ pan_pool_alloc_desc(cache->rsds.pool, RENDERER_STATE);
+#else
+ unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
+ struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
+ cache->rsds.pool, PAN_DESC(RENDERER_STATE),
+ PAN_DESC_ARRAY(bd_count, BLEND));
+#endif
+
+ mali_ptr blend_shaders[8] = {0};
+
+ const struct pan_blit_shader_data *blit_shader =
+ pan_blitter_get_blit_shader(cache, &blit_key);
+
+#if PAN_ARCH <= 5
+ pan_blitter_get_blend_shaders(cache,
+ views->rt_count, views->dst_rts, blit_shader,
+ blend_shaders);
+#endif
+
+ pan_blitter_emit_rsd(blit_shader, views, blend_shaders, rsd_ptr.cpu);
+ rsd->address = rsd_ptr.gpu;
+ _mesa_hash_table_insert(cache->rsds.rsds, &rsd->key, rsd);
+
+out:
+ pthread_mutex_unlock(&cache->rsds.lock);
+ return rsd->address;
}
static mali_ptr
-pan_blit_get_rsd(struct panfrost_device *dev,
+pan_blit_get_rsd(struct pan_blitter_cache *cache,
const struct pan_image_view *src_views,
const struct pan_image_view *dst_view)
{
- const struct util_format_description *desc =
- util_format_description(src_views[0].format);
- const struct pan_image_view *src_rt = NULL, *dst_rt = NULL;
- const struct pan_image_view *src_z = NULL, *dst_z = NULL;
- const struct pan_image_view *src_s = NULL, *dst_s = NULL;
-
- if (util_format_has_depth(desc)) {
- src_z = &src_views[0];
- dst_z = dst_view;
- }
-
- if (src_views[1].format) {
- src_s = &src_views[1];
- dst_s = dst_view;
- } else if (util_format_has_stencil(desc)) {
- src_s = &src_views[0];
- dst_s = dst_view;
- }
-
- if (!src_z && !src_s) {
- src_rt = &src_views[0];
- dst_rt = dst_view;
- }
-
- return pan_blitter_get_rsd(dev, src_rt ? 1 : 0, &src_rt, &dst_rt,
- src_z, dst_z, src_s, dst_s);
+ const struct util_format_description *desc =
+ util_format_description(src_views[0].format);
+
+ struct pan_blitter_views views = {};
+
+ if (util_format_has_depth(desc)) {
+ views.src_z = &src_views[0];
+ views.dst_z = dst_view;
+ }
+
+ if (src_views[1].format) {
+ views.src_s = &src_views[1];
+ views.dst_s = dst_view;
+ } else if (util_format_has_stencil(desc)) {
+ views.src_s = &src_views[0];
+ views.dst_s = dst_view;
+ }
+
+ if (!views.src_z && !views.src_s) {
+ views.rt_count = 1;
+ views.src_rts[0] = src_views;
+ views.dst_rts[0] = dst_view;
+ }
+
+ return pan_blitter_get_rsd(cache, &views);
+}
+#endif
+
+static struct pan_blitter_views
+pan_preload_get_views(const struct pan_fb_info *fb, bool zs,
+ struct pan_image_view *patched_s)
+{
+ struct pan_blitter_views views = {0};
+
+ if (zs) {
+ if (fb->zs.preload.z)
+ views.src_z = views.dst_z = fb->zs.view.zs;
+
+ if (fb->zs.preload.s) {
+ const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
+ enum pipe_format fmt = util_format_get_depth_only(view->format);
+
+ switch (view->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ fmt = PIPE_FORMAT_X24S8_UINT;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ fmt = PIPE_FORMAT_X32_S8X24_UINT;
+ break;
+ default:
+ fmt = view->format;
+ break;
+ }
+
+ if (fmt != view->format) {
+ *patched_s = *view;
+ patched_s->format = fmt;
+ views.src_s = views.dst_s = patched_s;
+ } else {
+ views.src_s = views.dst_s = view;
+ }
+ }
+ } else {
+ for (unsigned i = 0; i < fb->rt_count; i++) {
+ if (fb->rts[i].preload) {
+ views.src_rts[i] = fb->rts[i].view;
+ views.dst_rts[i] = fb->rts[i].view;
+ }
+ }
+
+ views.rt_count = fb->rt_count;
+ }
+
+ return views;
}
static bool
pan_preload_needed(const struct pan_fb_info *fb, bool zs)
{
- if (zs) {
- if (fb->zs.preload.z || fb->zs.preload.s)
- return true;
- } else {
- for (unsigned i = 0; i < fb->rt_count; i++) {
- if (fb->rts[i].preload)
- return true;
- }
- }
-
- return false;
+ if (zs) {
+ if (fb->zs.preload.z || fb->zs.preload.s)
+ return true;
+ } else {
+ for (unsigned i = 0; i < fb->rt_count; i++) {
+ if (fb->rts[i].preload)
+ return true;
+ }
+ }
+
+ return false;
}
-static void
-pan_blitter_emit_varying(struct pan_pool *pool,
- mali_ptr coordinates,
- struct MALI_DRAW *draw)
+static mali_ptr
+pan_blitter_emit_varying(struct pan_pool *pool)
{
- /* Bifrost needs an empty desc to mark end of prefetching */
- bool padding_buffer = PAN_ARCH >= 6;
-
- struct panfrost_ptr varying =
- pan_pool_alloc_desc(pool, ATTRIBUTE);
- struct panfrost_ptr varying_buffer =
- pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
- ATTRIBUTE_BUFFER);
-
- pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
- cfg.pointer = coordinates;
- cfg.stride = 4 * sizeof(float);
- cfg.size = cfg.stride * 4;
- }
-
- if (padding_buffer) {
- pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
- ATTRIBUTE_BUFFER, cfg);
- }
-
- pan_pack(varying.cpu, ATTRIBUTE, cfg) {
- cfg.buffer_index = 0;
- cfg.offset_enable = PAN_ARCH <= 5;
- cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
- }
-
- draw->varyings = varying.gpu;
- draw->varying_buffers = varying_buffer.gpu;
+ struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
+
+ pan_pack(varying.cpu, ATTRIBUTE, cfg) {
+ cfg.buffer_index = 0;
+ cfg.offset_enable = PAN_ARCH <= 5;
+ cfg.format =
+ GENX(panfrost_format_from_pipe_format)(PIPE_FORMAT_R32G32B32_FLOAT)->hw;
+
+#if PAN_ARCH >= 9
+ cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
+ cfg.table = PAN_BLIT_TABLE_ATTRIBUTE_BUFFER;
+ cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
+ cfg.stride = 4 * sizeof(float);
+#endif
+ }
+
+ return varying.gpu;
}
static mali_ptr
-pan_blitter_emit_sampler(struct pan_pool *pool,
- bool nearest_filter)
+pan_blitter_emit_varying_buffer(struct pan_pool *pool, mali_ptr coordinates)
{
- struct panfrost_ptr sampler =
- pan_pool_alloc_desc(pool, SAMPLER);
+#if PAN_ARCH >= 9
+ struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
- pan_pack(sampler.cpu, SAMPLER, cfg) {
- cfg.seamless_cube_map = false;
- cfg.normalized_coordinates = false;
- cfg.minify_nearest = nearest_filter;
- cfg.magnify_nearest = nearest_filter;
- }
+ pan_pack(varying_buffer.cpu, BUFFER, cfg) {
+ cfg.address = coordinates;
+ cfg.size = 4 * sizeof(float) * 4;
+ }
+#else
+ /* Bifrost needs an empty desc to mark end of prefetching */
+ bool padding_buffer = PAN_ARCH >= 6;
+
+ struct panfrost_ptr varying_buffer = pan_pool_alloc_desc_array(
+ pool, (padding_buffer ? 2 : 1), ATTRIBUTE_BUFFER);
+
+ pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
+ cfg.pointer = coordinates;
+ cfg.stride = 4 * sizeof(float);
+ cfg.size = cfg.stride * 4;
+ }
+
+ if (padding_buffer) {
+ pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
+ ATTRIBUTE_BUFFER, cfg)
+ ;
+ }
+#endif
- return sampler.gpu;
+ return varying_buffer.gpu;
}
static mali_ptr
-pan_blitter_emit_textures(struct pan_pool *pool,
- unsigned tex_count,
+pan_blitter_emit_sampler(struct pan_pool *pool, bool nearest_filter)
+{
+ struct panfrost_ptr sampler = pan_pool_alloc_desc(pool, SAMPLER);
+
+ pan_pack(sampler.cpu, SAMPLER, cfg) {
+ cfg.seamless_cube_map = false;
+ cfg.normalized_coordinates = false;
+ cfg.minify_nearest = nearest_filter;
+ cfg.magnify_nearest = nearest_filter;
+ }
+
+ return sampler.gpu;
+}
+
+static mali_ptr
+pan_blitter_emit_textures(struct pan_pool *pool, unsigned tex_count,
const struct pan_image_view **views)
{
#if PAN_ARCH >= 6
- struct panfrost_ptr textures =
- pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
+ struct panfrost_ptr textures =
+ pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
- for (unsigned i = 0; i < tex_count; i++) {
- void *texture = textures.cpu + (pan_size(TEXTURE) * i);
- size_t payload_size =
- GENX(panfrost_estimate_texture_payload_size)(views[i]);
- struct panfrost_ptr surfaces =
- pan_pool_alloc_aligned(pool, payload_size,
- pan_alignment(SURFACE_WITH_STRIDE));
+ for (unsigned i = 0; i < tex_count; i++) {
+ void *texture = textures.cpu + (pan_size(TEXTURE) * i);
+ size_t payload_size =
+ GENX(panfrost_estimate_texture_payload_size)(views[i]);
+ struct panfrost_ptr surfaces =
+ pan_pool_alloc_aligned(pool, payload_size, 64);
- GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces);
- }
+ GENX(panfrost_new_texture)(views[i], texture, &surfaces);
+ }
- return textures.gpu;
+ return textures.gpu;
#else
- mali_ptr textures[8] = { 0 };
-
- for (unsigned i = 0; i < tex_count; i++) {
- size_t sz = pan_size(TEXTURE) +
- GENX(panfrost_estimate_texture_payload_size)(views[i]);
- struct panfrost_ptr texture =
- pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
- struct panfrost_ptr surfaces = {
- .cpu = texture.cpu + pan_size(TEXTURE),
- .gpu = texture.gpu + pan_size(TEXTURE),
- };
-
- GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces);
- textures[i] = texture.gpu;
- }
-
- return pan_pool_upload_aligned(pool, textures,
- tex_count * sizeof(mali_ptr),
- sizeof(mali_ptr));
+ mali_ptr textures[8] = {0};
+
+ for (unsigned i = 0; i < tex_count; i++) {
+ size_t sz = pan_size(TEXTURE) +
+ GENX(panfrost_estimate_texture_payload_size)(views[i]);
+ struct panfrost_ptr texture =
+ pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
+ struct panfrost_ptr surfaces = {
+ .cpu = texture.cpu + pan_size(TEXTURE),
+ .gpu = texture.gpu + pan_size(TEXTURE),
+ };
+
+ GENX(panfrost_new_texture)(views[i], texture.cpu, &surfaces);
+ textures[i] = texture.gpu;
+ }
+
+ return pan_pool_upload_aligned(pool, textures, tex_count * sizeof(mali_ptr),
+ sizeof(mali_ptr));
#endif
}
-static void
-pan_preload_emit_textures(struct pan_pool *pool,
- const struct pan_fb_info *fb, bool zs,
- struct MALI_DRAW *draw)
+static mali_ptr
+pan_preload_emit_textures(struct pan_pool *pool, const struct pan_fb_info *fb,
+ bool zs, unsigned *tex_count_out)
{
- const struct pan_image_view *views[8];
- struct pan_image_view patched_s_view;
- unsigned tex_count = 0;
-
- if (zs) {
- if (fb->zs.preload.z)
- views[tex_count++] = fb->zs.view.zs;
-
- if (fb->zs.preload.s) {
- const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
- enum pipe_format fmt = util_format_get_depth_only(view->format);
-
- switch (view->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
- default: fmt = view->format; break;
- }
-
- if (fmt != view->format) {
- patched_s_view = *view;
- patched_s_view.format = fmt;
- view = &patched_s_view;
- }
- views[tex_count++] = view;
- }
- } else {
- for (unsigned i = 0; i < fb->rt_count; i++) {
- if (fb->rts[i].preload)
- views[tex_count++] = fb->rts[i].view;
- }
-
- }
-
- draw->textures = pan_blitter_emit_textures(pool, tex_count, views);
+ const struct pan_image_view *views[8];
+ struct pan_image_view patched_s_view;
+ unsigned tex_count = 0;
+
+ if (zs) {
+ if (fb->zs.preload.z)
+ views[tex_count++] = fb->zs.view.zs;
+
+ if (fb->zs.preload.s) {
+ const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
+ enum pipe_format fmt = util_format_get_depth_only(view->format);
+
+ switch (view->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ fmt = PIPE_FORMAT_X24S8_UINT;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ fmt = PIPE_FORMAT_X32_S8X24_UINT;
+ break;
+ default:
+ fmt = view->format;
+ break;
+ }
+
+ if (fmt != view->format) {
+ patched_s_view = *view;
+ patched_s_view.format = fmt;
+ view = &patched_s_view;
+ }
+ views[tex_count++] = view;
+ }
+ } else {
+ for (unsigned i = 0; i < fb->rt_count; i++) {
+ if (fb->rts[i].preload)
+ views[tex_count++] = fb->rts[i].view;
+ }
+ }
+
+ *tex_count_out = tex_count;
+
+ return pan_blitter_emit_textures(pool, tex_count, views);
}
+#if PAN_ARCH >= 8
+/* TODO: cache */
static mali_ptr
-pan_blitter_emit_viewport(struct pan_pool *pool,
- uint16_t minx, uint16_t miny,
- uint16_t maxx, uint16_t maxy)
+pan_blitter_emit_zs(struct pan_pool *pool, bool z, bool s)
{
- struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
+ struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
- pan_pack(vp.cpu, VIEWPORT, cfg) {
- cfg.scissor_minimum_x = minx;
- cfg.scissor_minimum_y = miny;
- cfg.scissor_maximum_x = maxx;
- cfg.scissor_maximum_y = maxy;
- }
+ pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
+ cfg.depth_function = MALI_FUNC_ALWAYS;
+ cfg.depth_write_enable = z;
- return vp.gpu;
-}
+ if (z)
+ cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
-static void
-pan_preload_emit_dcd(struct pan_pool *pool,
- struct pan_fb_info *fb, bool zs,
- mali_ptr coordinates,
- mali_ptr tsd, mali_ptr rsd,
- void *out, bool always_write)
-{
- pan_pack(out, DRAW, cfg) {
- cfg.four_components_per_vertex = true;
- cfg.draw_descriptor_is_64b = true;
- cfg.thread_storage = tsd;
- cfg.state = rsd;
+ cfg.stencil_test_enable = s;
+ cfg.stencil_from_shader = s;
- cfg.position = coordinates;
- pan_blitter_emit_varying(pool, coordinates, &cfg);
- uint16_t minx = 0, miny = 0, maxx, maxy;
+ cfg.front_compare_function = MALI_FUNC_ALWAYS;
+ cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
+ cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
+ cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
+ cfg.front_write_mask = 0xFF;
+ cfg.front_value_mask = 0xFF;
-#if PAN_ARCH == 4
- maxx = fb->width - 1;
- maxy = fb->height - 1;
-#else
- /* Align on 32x32 tiles */
- minx = fb->extent.minx & ~31;
- miny = fb->extent.miny & ~31;
- maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
- maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
-#endif
+ cfg.back_compare_function = MALI_FUNC_ALWAYS;
+ cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
+ cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
+ cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
+ cfg.back_write_mask = 0xFF;
+ cfg.back_value_mask = 0xFF;
- cfg.viewport =
- pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
+ cfg.depth_cull_enable = false;
+ }
- pan_preload_emit_textures(pool, fb, zs, &cfg);
+ return zsd.gpu;
+}
+#else
+static mali_ptr
+pan_blitter_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
+ uint16_t maxx, uint16_t maxy)
+{
+ struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
- cfg.samplers = pan_blitter_emit_sampler(pool, true);
- cfg.texture_descriptor_is_64b = PAN_ARCH <= 5;
+ pan_pack(vp.cpu, VIEWPORT, cfg) {
+ cfg.scissor_minimum_x = minx;
+ cfg.scissor_minimum_y = miny;
+ cfg.scissor_maximum_x = maxx;
+ cfg.scissor_maximum_y = maxy;
+ }
-#if PAN_ARCH >= 6
- /* Tiles updated by blit shaders are still considered
- * clean (separate for colour and Z/S), allowing us to
- * suppress unnecessary writeback */
- cfg.clean_fragment_write = !always_write;
-#endif
- }
+ return vp.gpu;
}
+#endif
static void
-pan_blit_emit_dcd(struct pan_pool *pool,
- mali_ptr src_coords, mali_ptr dst_coords,
- mali_ptr textures, mali_ptr samplers,
- mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
- void *out)
+pan_preload_emit_dcd(struct pan_blitter_cache *cache,
+ struct pan_pool *pool, struct pan_fb_info *fb, bool zs,
+ mali_ptr coordinates, mali_ptr tsd, void *out,
+ bool always_write)
{
- pan_pack(out, DRAW, cfg) {
- cfg.four_components_per_vertex = true;
- cfg.draw_descriptor_is_64b = true;
- cfg.thread_storage = tsd;
- cfg.state = rsd;
-
- cfg.position = dst_coords;
- pan_blitter_emit_varying(pool, src_coords, &cfg);
- cfg.viewport = vpd;
- cfg.texture_descriptor_is_64b = PAN_ARCH <= 5;
- cfg.textures = textures;
- cfg.samplers = samplers;
- }
+ unsigned tex_count = 0;
+ mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
+ mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
+ mali_ptr varyings = pan_blitter_emit_varying(pool);
+ mali_ptr varying_buffers =
+ pan_blitter_emit_varying_buffer(pool, coordinates);
+
+ /* Tiles updated by blit shaders are still considered clean (separate
+ * for colour and Z/S), allowing us to suppress unnecessary writeback
+ */
+ UNUSED bool clean_fragment_write = !always_write;
+
+ /* Image view used when patching stencil formats for combined
+ * depth/stencil preloads.
+ */
+ struct pan_image_view patched_s;
+
+ struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
+
+#if PAN_ARCH <= 7
+ pan_pack(out, DRAW, cfg) {
+ uint16_t minx = 0, miny = 0, maxx, maxy;
+
+ if (PAN_ARCH == 4) {
+ maxx = fb->width - 1;
+ maxy = fb->height - 1;
+ } else {
+ /* Align on 32x32 tiles */
+ minx = fb->extent.minx & ~31;
+ miny = fb->extent.miny & ~31;
+ maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
+ maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
+ }
+
+ cfg.thread_storage = tsd;
+ cfg.state = pan_blitter_get_rsd(cache, &views);
+
+ cfg.position = coordinates;
+ cfg.viewport = pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
+
+ cfg.varyings = varyings;
+ cfg.varying_buffers = varying_buffers;
+ cfg.textures = textures;
+ cfg.samplers = samplers;
+
+#if PAN_ARCH >= 6
+ cfg.clean_fragment_write = clean_fragment_write;
+#endif
+ }
+#else
+ struct panfrost_ptr T;
+ unsigned nr_tables = PAN_BLIT_NUM_RESOURCE_TABLES;
+
+ /* Although individual resources need only 16 byte alignment, the
+ * resource table as a whole must be 64-byte aligned.
+ */
+ T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
+ memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
+
+ panfrost_make_resource_table(T, PAN_BLIT_TABLE_TEXTURE, textures, tex_count);
+ panfrost_make_resource_table(T, PAN_BLIT_TABLE_SAMPLER, samplers, 1);
+ panfrost_make_resource_table(T, PAN_BLIT_TABLE_ATTRIBUTE, varyings, 1);
+ panfrost_make_resource_table(T, PAN_BLIT_TABLE_ATTRIBUTE_BUFFER,
+ varying_buffers, 1);
+
+ struct pan_blit_shader_key key = pan_blitter_get_key(&views);
+ const struct pan_blit_shader_data *blit_shader =
+ pan_blitter_get_blit_shader(cache, &key);
+
+ bool z = fb->zs.preload.z;
+ bool s = fb->zs.preload.s;
+ bool ms = pan_blitter_is_ms(&views);
+
+ struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
+ pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
+ cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
+ cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
+ cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
+ cfg.binary = blit_shader->address;
+ cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
+ }
+
+ unsigned bd_count = views.rt_count;
+ struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
+
+ if (!zs) {
+ pan_blitter_emit_blends(blit_shader, &views, NULL, blend.cpu);
+ }
+
+ pan_pack(out, DRAW, cfg) {
+ if (zs) {
+ /* ZS_EMIT requires late update/kill */
+ cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
+ cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
+ cfg.blend_count = 0;
+ } else {
+ /* Skipping ATEST requires forcing Z/S */
+ cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
+ cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+
+ cfg.blend = blend.gpu;
+ cfg.blend_count = bd_count;
+ cfg.render_target_mask = 0x1;
+ }
+
+ cfg.allow_forward_pixel_to_kill = !zs;
+ cfg.allow_forward_pixel_to_be_killed = true;
+ cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
+ cfg.sample_mask = 0xFFFF;
+ cfg.multisample_enable = ms;
+ cfg.evaluate_per_sample = ms;
+ cfg.maximum_z = 1.0;
+ cfg.clean_fragment_write = clean_fragment_write;
+ cfg.shader.resources = T.gpu | nr_tables;
+ cfg.shader.shader = spd.gpu;
+ cfg.shader.thread_storage = tsd;
+ }
+#endif
}
-static struct panfrost_ptr
-pan_blit_emit_tiler_job(struct pan_pool *desc_pool,
- struct pan_scoreboard *scoreboard,
- mali_ptr src_coords, mali_ptr dst_coords,
- mali_ptr textures, mali_ptr samplers,
- mali_ptr vpd, mali_ptr rsd, mali_ptr tsd,
- mali_ptr tiler)
+#if PAN_ARCH <= 7
+static void *
+pan_blit_emit_tiler_job(struct pan_pool *pool, struct pan_jc *jc,
+ mali_ptr tiler, struct panfrost_ptr *job)
{
- struct panfrost_ptr job =
- pan_pool_alloc_desc(desc_pool, TILER_JOB);
-
- pan_blit_emit_dcd(desc_pool,
- src_coords, dst_coords, textures, samplers,
- vpd, tsd, rsd,
- pan_section_ptr(job.cpu, TILER_JOB, DRAW));
-
- pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
- cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
- cfg.index_count = 4;
- cfg.job_task_split = 6;
- }
-
- pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
- cfg.constant = 1.0f;
- }
-
- void *invoc = pan_section_ptr(job.cpu,
- TILER_JOB,
- INVOCATION);
- panfrost_pack_work_groups_compute(invoc, 1, 4,
- 1, 1, 1, 1, true, false);
+ *job = pan_pool_alloc_desc(pool, TILER_JOB);
+
+ pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE, cfg) {
+ cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
+ cfg.index_count = 4;
+ cfg.job_task_split = 6;
+ }
+
+ pan_section_pack(job->cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
+ cfg.constant = 1.0f;
+ }
+
+ void *invoc = pan_section_ptr(job->cpu, TILER_JOB, INVOCATION);
+ panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
#if PAN_ARCH >= 6
- pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
- pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
- cfg.address = tiler;
- }
+ pan_section_pack(job->cpu, TILER_JOB, PADDING, cfg)
+ ;
+ pan_section_pack(job->cpu, TILER_JOB, TILER, cfg) {
+ cfg.address = tiler;
+ }
#endif
- panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
- false, false, 0, 0, &job, false);
- return job;
+ pan_jc_add_job(pool, jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, job,
+ false);
+ return pan_section_ptr(job->cpu, TILER_JOB, DRAW);
}
+#endif
#if PAN_ARCH >= 6
static void
pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
struct pan_fb_info *fb)
{
- if (fb->bifrost.pre_post.dcds.gpu)
- return;
-
- fb->bifrost.pre_post.dcds =
- pan_pool_alloc_desc_aggregate(desc_pool,
- PAN_DESC(DRAW),
- PAN_DESC(DRAW_PADDING),
- PAN_DESC(DRAW),
- PAN_DESC(DRAW_PADDING),
- PAN_DESC(DRAW),
- PAN_DESC(DRAW_PADDING));
+ if (fb->bifrost.pre_post.dcds.gpu)
+ return;
+
+ fb->bifrost.pre_post.dcds = pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
}
static void
-pan_preload_emit_pre_frame_dcd(struct pan_pool *desc_pool,
- struct pan_fb_info *fb, bool zs,
- mali_ptr coords, mali_ptr rsd,
+pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache *cache,
+ struct pan_pool *desc_pool,
+ struct pan_fb_info *fb, bool zs, mali_ptr coords,
mali_ptr tsd)
{
- unsigned dcd_idx = zs ? 0 : 1;
- pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
- assert(fb->bifrost.pre_post.dcds.cpu);
- void *dcd = fb->bifrost.pre_post.dcds.cpu +
- (dcd_idx * (pan_size(DRAW) + pan_size(DRAW_PADDING)));
-
- int crc_rt = GENX(pan_select_crc_rt)(fb);
-
- bool always_write = false;
-
- /* If CRC data is currently invalid and this batch will make it valid,
- * write even clean tiles to make sure CRC data is updated. */
- if (crc_rt >= 0) {
- bool *valid = fb->rts[crc_rt].crc_valid;
- bool full = !fb->extent.minx && !fb->extent.miny &&
- fb->extent.maxx == (fb->width - 1) &&
- fb->extent.maxy == (fb->height - 1);
-
- if (full && !(*valid))
- always_write = true;
- }
-
- pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, dcd, always_write);
- if (zs) {
- enum pipe_format fmt = fb->zs.view.zs->image->layout.format;
- bool always = false;
-
- /* If we're dealing with a combined ZS resource and only one
- * component is cleared, we need to reload the whole surface
- * because the zs_clean_pixel_write_enable flag is set in that
- * case.
- */
- if (util_format_is_depth_and_stencil(fmt) &&
- fb->zs.clear.z != fb->zs.clear.s)
- always = true;
-
- /* We could use INTERSECT on Bifrost v7 too, but
- * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
- * buffer one or more tiles ahead, making ZS data immediately
- * available for any ZS tests taking place in other shaders.
- * Thing's haven't been benchmarked to determine what's
- * preferable (saving bandwidth vs having ZS preloaded
- * earlier), so let's leave it like that for now.
- */
- fb->bifrost.pre_post.modes[dcd_idx] =
- desc_pool->dev->arch > 6 ?
- MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :
- always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
- MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
- } else {
- fb->bifrost.pre_post.modes[dcd_idx] =
- always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
- MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
- }
+ unsigned dcd_idx = zs ? 1 : 0;
+ pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
+ assert(fb->bifrost.pre_post.dcds.cpu);
+ void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
+
+ /* We only use crc_rt to determine whether to force writes for updating
+ * the CRCs, so use a conservative tile size (16x16).
+ */
+ int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
+
+ bool always_write = false;
+
+ /* If CRC data is currently invalid and this batch will make it valid,
+ * write even clean tiles to make sure CRC data is updated. */
+ if (crc_rt >= 0) {
+ bool *valid = fb->rts[crc_rt].crc_valid;
+ bool full = !fb->extent.minx && !fb->extent.miny &&
+ fb->extent.maxx == (fb->width - 1) &&
+ fb->extent.maxy == (fb->height - 1);
+
+ if (full && !(*valid))
+ always_write = true;
+ }
+
+ pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd,
+ always_write);
+ if (zs) {
+ enum pipe_format fmt = fb->zs.view.zs
+ ? fb->zs.view.zs->planes[0]->layout.format
+ : fb->zs.view.s->planes[0]->layout.format;
+ bool always = false;
+
+ /* If we're dealing with a combined ZS resource and only one
+ * component is cleared, we need to reload the whole surface
+ * because the zs_clean_pixel_write_enable flag is set in that
+ * case.
+ */
+ if (util_format_is_depth_and_stencil(fmt) &&
+ fb->zs.clear.z != fb->zs.clear.s)
+ always = true;
+
+ /* We could use INTERSECT on Bifrost v7 too, but
+ * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
+ * buffer one or more tiles ahead, making ZS data immediately
+ * available for any ZS tests taking place in other shaders.
+ * Thing's haven't been benchmarked to determine what's
+ * preferable (saving bandwidth vs having ZS preloaded
+ * earlier), so let's leave it like that for now.
+ */
+ fb->bifrost.pre_post.modes[dcd_idx] =
+ PAN_ARCH > 6
+ ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
+ : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
+ : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
+ } else {
+ fb->bifrost.pre_post.modes[dcd_idx] =
+ always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
+ : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
+ }
}
#else
-static void
-pan_preload_emit_tiler_job(struct pan_pool *desc_pool,
- struct pan_scoreboard *scoreboard,
- struct pan_fb_info *fb, bool zs,
- mali_ptr coords, mali_ptr rsd, mali_ptr tsd)
+static struct panfrost_ptr
+pan_preload_emit_tiler_job(struct pan_blitter_cache *cache, struct pan_pool *desc_pool, struct pan_jc *jc,
+ struct pan_fb_info *fb, bool zs, mali_ptr coords,
+ mali_ptr tsd)
{
- struct panfrost_ptr job =
- pan_pool_alloc_desc(desc_pool, TILER_JOB);
-
- pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd,
- pan_section_ptr(job.cpu, TILER_JOB, DRAW),
- false);
-
- pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
- cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
- cfg.index_count = 4;
- cfg.job_task_split = 6;
- }
-
- pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
- cfg.constant = 1.0f;
- }
-
- void *invoc = pan_section_ptr(job.cpu,
- TILER_JOB,
- INVOCATION);
- panfrost_pack_work_groups_compute(invoc, 1, 4,
- 1, 1, 1, 1, true, false);
-
- panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
- false, false, 0, 0, &job, true);
+ struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
+
+ pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd,
+ pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
+
+ pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
+ cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
+ cfg.index_count = 4;
+ cfg.job_task_split = 6;
+ }
+
+ pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
+ cfg.constant = 1.0f;
+ }
+
+ void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
+ panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
+
+ pan_jc_add_job(desc_pool, jc, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job,
+ true);
+ return job;
}
#endif
-static void
-pan_preload_fb_part(struct pan_pool *pool,
- struct pan_scoreboard *scoreboard,
- struct pan_fb_info *fb, bool zs,
+static struct panfrost_ptr
+pan_preload_fb_part(struct pan_blitter_cache *cache, struct pan_pool *pool,
+ struct pan_jc *jc, struct pan_fb_info *fb, bool zs,
mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
{
- struct panfrost_device *dev = pool->dev;
- mali_ptr rsd = pan_preload_get_rsd(dev, fb, zs);
+ struct panfrost_ptr job = {0};
#if PAN_ARCH >= 6
- pan_preload_emit_pre_frame_dcd(pool, fb, zs,
- coords, rsd, tsd);
+ pan_preload_emit_pre_frame_dcd(cache, pool, fb, zs, coords, tsd);
#else
- pan_preload_emit_tiler_job(pool, scoreboard,
- fb, zs, coords, rsd, tsd);
+ job = pan_preload_emit_tiler_job(cache, pool, jc, fb, zs, coords, tsd);
#endif
+ return job;
}
-void
-GENX(pan_preload_fb)(struct pan_pool *pool,
- struct pan_scoreboard *scoreboard,
- struct pan_fb_info *fb,
- mali_ptr tsd, mali_ptr tiler)
+unsigned
+GENX(pan_preload_fb)(struct pan_blitter_cache *cache, struct pan_pool *pool,
+ struct pan_jc *jc, struct pan_fb_info *fb, mali_ptr tsd,
+ mali_ptr tiler, struct panfrost_ptr *jobs)
{
- bool preload_zs = pan_preload_needed(fb, true);
- bool preload_rts = pan_preload_needed(fb, false);
- mali_ptr coords;
-
- if (!preload_zs && !preload_rts)
- return;
-
- float rect[] = {
- 0.0, 0.0, 0.0, 1.0,
- fb->width, 0.0, 0.0, 1.0,
- 0.0, fb->height, 0.0, 1.0,
- fb->width, fb->height, 0.0, 1.0,
- };
-
- coords = pan_pool_upload_aligned(pool, rect,
- sizeof(rect), 64);
-
- if (preload_zs)
- pan_preload_fb_part(pool, scoreboard, fb, true, coords,
- tsd, tiler);
-
- if (preload_rts)
- pan_preload_fb_part(pool, scoreboard, fb, false, coords,
- tsd, tiler);
+ bool preload_zs = pan_preload_needed(fb, true);
+ bool preload_rts = pan_preload_needed(fb, false);
+ mali_ptr coords;
+
+ if (!preload_zs && !preload_rts)
+ return 0;
+
+ float rect[] = {
+ 0.0, 0.0, 0.0, 1.0, fb->width, 0.0, 0.0, 1.0,
+ 0.0, fb->height, 0.0, 1.0, fb->width, fb->height, 0.0, 1.0,
+ };
+
+ coords = pan_pool_upload_aligned(pool, rect, sizeof(rect), 64);
+
+ unsigned njobs = 0;
+ if (preload_zs) {
+ struct panfrost_ptr job =
+ pan_preload_fb_part(cache, pool, jc, fb, true, coords, tsd, tiler);
+ if (jobs && job.cpu)
+ jobs[njobs++] = job;
+ }
+
+ if (preload_rts) {
+ struct panfrost_ptr job =
+ pan_preload_fb_part(cache, pool, jc, fb, false, coords, tsd, tiler);
+ if (jobs && job.cpu)
+ jobs[njobs++] = job;
+ }
+
+ return njobs;
}
+#if PAN_ARCH <= 7
void
-GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
+GENX(pan_blit_ctx_init)(struct pan_blitter_cache *cache,
const struct pan_blit_info *info,
struct pan_pool *blit_pool,
struct pan_blit_context *ctx)
{
- memset(ctx, 0, sizeof(*ctx));
-
- ctx->z_scale = (float)(info->dst.end.z - info->dst.start.z + 1) /
- (info->src.end.z - info->src.start.z + 1);
-
- struct pan_image_view sviews[2] = {
- {
- .format = info->src.planes[0].format,
- .image = info->src.planes[0].image,
- .dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
- MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,
- .first_level = info->src.level,
- .last_level = info->src.level,
- .first_layer = info->src.start.layer,
- .last_layer = info->src.end.layer,
- .swizzle = {
- PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
- },
- },
- };
-
- struct pan_image_view dview = {
- .format = info->dst.planes[0].format,
- .image = info->dst.planes[0].image,
- .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?
- MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,
- .first_level = info->dst.level,
- .last_level = info->dst.level,
- .first_layer = info->dst.start.layer,
- .last_layer = info->dst.start.layer,
- .swizzle = {
- PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
- PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
- },
- };
-
- ctx->src.start.x = info->src.start.x;
- ctx->src.start.y = info->src.start.y;
- ctx->src.end.x = info->src.end.x;
- ctx->src.end.y = info->src.end.y;
- ctx->src.dim = sviews[0].dim;
- if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D)
- ctx->src.z_offset = info->src.start.z;
- else
- ctx->src.layer_offset = info->src.start.layer;
-
- if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
- ctx->dst.layer_offset = info->dst.start.z;
- ctx->dst.cur_layer = info->dst.start.z;
- ctx->dst.last_layer = info->dst.end.z;
- } else {
- ctx->dst.layer_offset = info->dst.start.layer;
- ctx->dst.cur_layer = info->dst.start.layer;
- ctx->dst.last_layer = info->dst.end.layer;
- }
-
- /* Split depth and stencil */
- if (util_format_is_depth_and_stencil(sviews[0].format)) {
- sviews[1] = sviews[0];
- sviews[0].format = util_format_get_depth_only(sviews[0].format);
- sviews[1].format = util_format_stencil_only(sviews[1].format);
- } else if (info->src.planes[1].format) {
- sviews[1] = sviews[0];
- sviews[1].format = info->src.planes[1].format;
- sviews[1].image = info->src.planes[1].image;
- }
-
- ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
-
- ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
-
- assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
-
- unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
- unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
- unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
- unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
- unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
- unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
-
- if (info->scissor.enable) {
- minx = MAX2(minx, info->scissor.minx);
- miny = MAX2(miny, info->scissor.miny);
- maxx = MIN2(maxx, info->scissor.maxx);
- maxy = MIN2(maxy, info->scissor.maxy);
- }
-
- const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };
- unsigned nviews = sviews[1].format ? 2 : 1;
-
- ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
- ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
-
- ctx->vpd = pan_blitter_emit_viewport(blit_pool,
- minx, miny, maxx, maxy);
-
- float dst_rect[] = {
- info->dst.start.x, info->dst.start.y, 0.0, 1.0,
- info->dst.end.x + 1, info->dst.start.y, 0.0, 1.0,
- info->dst.start.x, info->dst.end.y + 1, 0.0, 1.0,
- info->dst.end.x + 1, info->dst.end.y + 1, 0.0, 1.0,
- };
-
- ctx->position =
- pan_pool_upload_aligned(blit_pool, dst_rect,
- sizeof(dst_rect), 64);
+ memset(ctx, 0, sizeof(*ctx));
+
+ struct pan_image_view sviews[2] = {
+ {
+ .format = info->src.planes[0].format,
+ .planes =
+ {
+ info->src.planes[0].image,
+ info->src.planes[1].image,
+ info->src.planes[2].image,
+ },
+ .dim =
+ info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE
+ ? MALI_TEXTURE_DIMENSION_2D
+ : info->src.planes[0].image->layout.dim,
+ .first_level = info->src.level,
+ .last_level = info->src.level,
+ .first_layer = info->src.start.layer,
+ .last_layer = info->src.end.layer,
+ .swizzle =
+ {
+ PIPE_SWIZZLE_X,
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z,
+ PIPE_SWIZZLE_W,
+ },
+ },
+ };
+
+ struct pan_image_view dview = {
+ .format = info->dst.planes[0].format,
+ .planes =
+ {
+ info->dst.planes[0].image,
+ info->dst.planes[1].image,
+ info->dst.planes[2].image,
+ },
+ .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D
+ ? MALI_TEXTURE_DIMENSION_1D
+ : MALI_TEXTURE_DIMENSION_2D,
+ .first_level = info->dst.level,
+ .last_level = info->dst.level,
+ .first_layer = info->dst.start.layer,
+ .last_layer = info->dst.start.layer,
+ .swizzle =
+ {
+ PIPE_SWIZZLE_X,
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z,
+ PIPE_SWIZZLE_W,
+ },
+ };
+
+ ctx->src.start.x = info->src.start.x;
+ ctx->src.start.y = info->src.start.y;
+ ctx->src.end.x = info->src.end.x;
+ ctx->src.end.y = info->src.end.y;
+ ctx->src.dim = sviews[0].dim;
+
+ if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
+ unsigned max_z =
+ u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
+
+ ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
+ (info->dst.end.z - info->dst.start.z);
+ assert(info->dst.start.z != info->dst.end.z);
+ if (info->dst.start.z > info->dst.end.z) {
+ ctx->dst.cur_layer = info->dst.start.z - 1;
+ ctx->dst.last_layer = info->dst.end.z;
+ } else {
+ ctx->dst.cur_layer = info->dst.start.z;
+ ctx->dst.last_layer = info->dst.end.z - 1;
+ }
+ ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
+ ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
+ ctx->dst.layer_offset = ctx->dst.cur_layer;
+ } else {
+ unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
+ ctx->dst.layer_offset = info->dst.start.layer;
+ ctx->dst.cur_layer = info->dst.start.layer;
+ ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
+ ctx->z_scale = 1;
+ }
+
+ if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
+ if (info->src.start.z < info->src.end.z)
+ ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
+ else
+ ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
+ } else {
+ ctx->src.layer_offset = info->src.start.layer;
+ }
+
+ /* Split depth and stencil */
+ if (util_format_is_depth_and_stencil(sviews[0].format)) {
+ sviews[1] = sviews[0];
+ sviews[0].format = util_format_get_depth_only(sviews[0].format);
+ sviews[1].format = util_format_stencil_only(sviews[1].format);
+ } else if (info->src.planes[1].format) {
+ sviews[1] = sviews[0];
+ sviews[1].format = info->src.planes[1].format;
+ sviews[1].planes[0] = info->src.planes[1].image;
+ }
+
+ ctx->rsd = pan_blit_get_rsd(cache, sviews, &dview);
+
+ ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
+
+ assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
+
+ unsigned dst_w =
+ u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
+ unsigned dst_h =
+ u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
+ unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
+ unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
+ unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
+ unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
+
+ if (info->scissor.enable) {
+ minx = MAX2(minx, info->scissor.minx);
+ miny = MAX2(miny, info->scissor.miny);
+ maxx = MIN2(maxx, info->scissor.maxx);
+ maxy = MIN2(maxy, info->scissor.maxy);
+ }
+
+ const struct pan_image_view *sview_ptrs[] = {&sviews[0], &sviews[1]};
+ unsigned nviews = sviews[1].format ? 2 : 1;
+
+ ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
+ ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
+
+ ctx->vpd = pan_blitter_emit_viewport(blit_pool, minx, miny, maxx, maxy);
+
+ float dst_rect[] = {
+ info->dst.start.x, info->dst.start.y, 0.0, 1.0,
+ info->dst.end.x, info->dst.start.y, 0.0, 1.0,
+ info->dst.start.x, info->dst.end.y, 0.0, 1.0,
+ info->dst.end.x, info->dst.end.y, 0.0, 1.0,
+ };
+
+ ctx->position =
+ pan_pool_upload_aligned(blit_pool, dst_rect, sizeof(dst_rect), 64);
}
struct panfrost_ptr
-GENX(pan_blit)(struct pan_blit_context *ctx,
- struct pan_pool *pool,
- struct pan_scoreboard *scoreboard,
- mali_ptr tsd, mali_ptr tiler)
+GENX(pan_blit)(struct pan_blit_context *ctx, struct pan_pool *pool,
+ struct pan_jc *jc, mali_ptr tsd, mali_ptr tiler)
{
- if (ctx->dst.cur_layer < 0 || ctx->dst.cur_layer > ctx->dst.last_layer)
- return (struct panfrost_ptr){ 0 };
-
- int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
- float src_z;
- if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
- src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
- else
- src_z = ctx->src.layer_offset + layer;
-
- float src_rect[] = {
- ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
- ctx->src.end.x + 1, ctx->src.start.y, src_z, 1.0,
- ctx->src.start.x, ctx->src.end.y + 1, src_z, 1.0,
- ctx->src.end.x + 1, ctx->src.end.y + 1, src_z, 1.0,
- };
-
- mali_ptr src_coords =
- pan_pool_upload_aligned(pool, src_rect,
- sizeof(src_rect), 64);
-
- return pan_blit_emit_tiler_job(pool, scoreboard,
- src_coords, ctx->position,
- ctx->textures, ctx->samplers,
- ctx->vpd, ctx->rsd, tsd, tiler);
-}
-
-static uint32_t pan_blit_shader_key_hash(const void *key)
-{
- return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
-}
-
-static bool pan_blit_shader_key_equal(const void *a, const void *b)
-{
- return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
-}
-
-static uint32_t pan_blit_blend_shader_key_hash(const void *key)
-{
- return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
-}
-
-static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)
-{
- return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
-}
-
-static uint32_t pan_blit_rsd_key_hash(const void *key)
-{
- return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
+ if (ctx->dst.cur_layer < 0 ||
+ (ctx->dst.last_layer >= ctx->dst.layer_offset &&
+ ctx->dst.cur_layer > ctx->dst.last_layer) ||
+ (ctx->dst.last_layer < ctx->dst.layer_offset &&
+ ctx->dst.cur_layer < ctx->dst.last_layer))
+ return (struct panfrost_ptr){0};
+
+ int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
+ float src_z;
+ if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
+ src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
+ else
+ src_z = ctx->src.layer_offset + layer;
+
+ float src_rect[] = {
+ ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
+ ctx->src.end.x, ctx->src.start.y, src_z, 1.0,
+ ctx->src.start.x, ctx->src.end.y, src_z, 1.0,
+ ctx->src.end.x, ctx->src.end.y, src_z, 1.0,
+ };
+
+ mali_ptr src_coords =
+ pan_pool_upload_aligned(pool, src_rect, sizeof(src_rect), 64);
+
+ struct panfrost_ptr job = {0};
+ void *dcd = pan_blit_emit_tiler_job(pool, jc, tiler, &job);
+
+ pan_pack(dcd, DRAW, cfg) {
+ cfg.thread_storage = tsd;
+ cfg.state = ctx->rsd;
+
+ cfg.position = ctx->position;
+ cfg.varyings = pan_blitter_emit_varying(pool);
+ cfg.varying_buffers = pan_blitter_emit_varying_buffer(pool, src_coords);
+ cfg.viewport = ctx->vpd;
+ cfg.textures = ctx->textures;
+ cfg.samplers = ctx->samplers;
+ }
+
+ return job;
}
+#endif
-static bool pan_blit_rsd_key_equal(const void *a, const void *b)
-{
- return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
-}
+DERIVE_HASH_TABLE(pan_blit_shader_key);
+DERIVE_HASH_TABLE(pan_blit_blend_shader_key);
+DERIVE_HASH_TABLE(pan_blit_rsd_key);
static void
-pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)
+pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache *cache)
{
- static const struct pan_blit_shader_key prefill[] = {
- {
- .surfaces[0] = {
- .loc = FRAG_RESULT_DEPTH,
- .type = nir_type_float32,
- .dim = MALI_TEXTURE_DIMENSION_2D,
- .src_samples = 1,
- .dst_samples = 1,
- },
- },
- {
- .surfaces[1] = {
- .loc = FRAG_RESULT_STENCIL,
- .type = nir_type_uint32,
- .dim = MALI_TEXTURE_DIMENSION_2D,
- .src_samples = 1,
- .dst_samples = 1,
- },
- },
- {
- .surfaces[0] = {
- .loc = FRAG_RESULT_DATA0,
- .type = nir_type_float32,
- .dim = MALI_TEXTURE_DIMENSION_2D,
- .src_samples = 1,
- .dst_samples = 1,
- },
- },
- };
-
- for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
- pan_blitter_get_blit_shader(dev, &prefill[i]);
+ static const struct pan_blit_shader_key prefill[] = {
+ {
+ .surfaces[0] =
+ {
+ .loc = FRAG_RESULT_DEPTH,
+ .type = nir_type_float32,
+ .dim = MALI_TEXTURE_DIMENSION_2D,
+ .src_samples = 1,
+ .dst_samples = 1,
+ },
+ },
+ {
+ .surfaces[1] =
+ {
+ .loc = FRAG_RESULT_STENCIL,
+ .type = nir_type_uint32,
+ .dim = MALI_TEXTURE_DIMENSION_2D,
+ .src_samples = 1,
+ .dst_samples = 1,
+ },
+ },
+ {
+ .surfaces[0] =
+ {
+ .loc = FRAG_RESULT_DATA0,
+ .type = nir_type_float32,
+ .dim = MALI_TEXTURE_DIMENSION_2D,
+ .src_samples = 1,
+ .dst_samples = 1,
+ },
+ },
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
+ pan_blitter_get_blit_shader(cache, &prefill[i]);
}
void
-GENX(pan_blitter_init)(struct panfrost_device *dev,
- struct pan_pool *bin_pool,
- struct pan_pool *desc_pool)
+GENX(pan_blitter_cache_init)(struct pan_blitter_cache *cache,
+ unsigned gpu_id,
+ struct pan_blend_shader_cache *blend_shader_cache,
+ struct pan_pool *bin_pool,
+ struct pan_pool *desc_pool)
{
- dev->blitter.shaders.blit =
- _mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
- pan_blit_shader_key_equal);
- dev->blitter.shaders.blend =
- _mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,
- pan_blit_blend_shader_key_equal);
- dev->blitter.shaders.pool = bin_pool;
- pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
- pan_blitter_prefill_blit_shader_cache(dev);
-
- dev->blitter.rsds.pool = desc_pool;
- dev->blitter.rsds.rsds =
- _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
- pan_blit_rsd_key_equal);
- pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
+ cache->gpu_id = gpu_id;
+ cache->shaders.blit = pan_blit_shader_key_table_create(NULL);
+ cache->shaders.blend = pan_blit_blend_shader_key_table_create(NULL);
+ cache->shaders.pool = bin_pool;
+ pthread_mutex_init(&cache->shaders.lock, NULL);
+ pan_blitter_prefill_blit_shader_cache(cache);
+
+ cache->rsds.pool = desc_pool;
+ cache->rsds.rsds = pan_blit_rsd_key_table_create(NULL);
+ pthread_mutex_init(&cache->rsds.lock, NULL);
+ cache->blend_shader_cache = blend_shader_cache;
}
void
-GENX(pan_blitter_cleanup)(struct panfrost_device *dev)
+GENX(pan_blitter_cache_cleanup)(struct pan_blitter_cache *cache)
{
- _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
- _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
- pthread_mutex_destroy(&dev->blitter.shaders.lock);
- _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
- pthread_mutex_destroy(&dev->blitter.rsds.lock);
+ _mesa_hash_table_destroy(cache->shaders.blit, NULL);
+ _mesa_hash_table_destroy(cache->shaders.blend, NULL);
+ pthread_mutex_destroy(&cache->shaders.lock);
+ _mesa_hash_table_destroy(cache->rsds.rsds, NULL);
+ pthread_mutex_destroy(&cache->rsds.lock);
}