diff options
Diffstat (limited to 'src/gallium/drivers/crocus/crocus_blit.c')
-rw-r--r-- | src/gallium/drivers/crocus/crocus_blit.c | 836 |
1 files changed, 836 insertions, 0 deletions
diff --git a/src/gallium/drivers/crocus/crocus_blit.c b/src/gallium/drivers/crocus/crocus_blit.c new file mode 100644 index 00000000000..9cae82e3e2d --- /dev/null +++ b/src/gallium/drivers/crocus/crocus_blit.c @@ -0,0 +1,836 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "util/format/u_format.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/ralloc.h" +#include "intel/blorp/blorp.h" +#include "crocus_context.h" +#include "crocus_resource.h" +#include "crocus_screen.h" + +void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond) +{ + util_blitter_save_vertex_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_VERTEX]); + util_blitter_save_tessctrl_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]); + util_blitter_save_tesseval_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]); + util_blitter_save_geometry_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]); + util_blitter_save_so_targets(ice->blitter, ice->state.so_targets, + (struct pipe_stream_output_target**)ice->state.so_target); + util_blitter_save_vertex_buffer_slot(ice->blitter, ice->state.vertex_buffers); + util_blitter_save_vertex_elements(ice->blitter, (void *)ice->state.cso_vertex_elements); + if (op & CROCUS_SAVE_FRAGMENT_STATE) { + util_blitter_save_blend(ice->blitter, ice->state.cso_blend); + util_blitter_save_depth_stencil_alpha(ice->blitter, ice->state.cso_zsa); + util_blitter_save_stencil_ref(ice->blitter, &ice->state.stencil_ref); + util_blitter_save_fragment_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]); + util_blitter_save_sample_mask(ice->blitter, ice->state.sample_mask); + util_blitter_save_rasterizer(ice->blitter, ice->state.cso_rast); + util_blitter_save_scissor(ice->blitter, &ice->state.scissors[0]); + util_blitter_save_viewport(ice->blitter, &ice->state.viewports[0]); + util_blitter_save_fragment_constant_buffer_slot(ice->blitter, &ice->state.shaders[MESA_SHADER_FRAGMENT].constbufs[0]); + } + + if (!render_cond) + util_blitter_save_render_condition(ice->blitter, + (struct pipe_query *)ice->condition.query, + ice->condition.condition, + ice->condition.mode); + +// util_blitter_save_scissor(ice->blitter, &ice->scissors[0]); + if (op & CROCUS_SAVE_FRAMEBUFFER) + util_blitter_save_framebuffer(ice->blitter, &ice->state.framebuffer); + + if (op & CROCUS_SAVE_TEXTURES) { + util_blitter_save_fragment_sampler_states(ice->blitter, 1, (void **)ice->state.shaders[MESA_SHADER_FRAGMENT].samplers); + util_blitter_save_fragment_sampler_views(ice->blitter, 1, (struct pipe_sampler_view **)ice->state.shaders[MESA_SHADER_FRAGMENT].textures); + } +} + +/** + * Helper function for handling mirror image blits. + * + * If coord0 > coord1, swap them and return "true" (mirrored). + */ +static bool +apply_mirror(float *coord0, float *coord1) +{ + if (*coord0 > *coord1) { + float tmp = *coord0; + *coord0 = *coord1; + *coord1 = tmp; + return true; + } + return false; +} + +/** + * Compute the number of pixels to clip for each side of a rect + * + * \param x0 The rect's left coordinate + * \param y0 The rect's bottom coordinate + * \param x1 The rect's right coordinate + * \param y1 The rect's top coordinate + * \param min_x The clipping region's left coordinate + * \param min_y The clipping region's bottom coordinate + * \param max_x The clipping region's right coordinate + * \param max_y The clipping region's top coordinate + * \param clipped_x0 The number of pixels to clip from the left side + * \param clipped_y0 The number of pixels to clip from the bottom side + * \param clipped_x1 The number of pixels to clip from the right side + * \param clipped_y1 The number of pixels to clip from the top side + * + * \return false if we clip everything away, true otherwise + */ +static inline bool +compute_pixels_clipped(float x0, float y0, float x1, float y1, + float min_x, float min_y, float max_x, float max_y, + float *clipped_x0, float *clipped_y0, + float *clipped_x1, float *clipped_y1) +{ + /* If we are going to clip everything away, stop. */ + if (!(min_x <= max_x && + min_y <= max_y && + x0 <= max_x && + y0 <= max_y && + min_x <= x1 && + min_y <= y1 && + x0 <= x1 && + y0 <= y1)) { + return false; + } + + if (x0 < min_x) + *clipped_x0 = min_x - x0; + else + *clipped_x0 = 0; + if (max_x < x1) + *clipped_x1 = x1 - max_x; + else + *clipped_x1 = 0; + + if (y0 < min_y) + *clipped_y0 = min_y - y0; + else + *clipped_y0 = 0; + if (max_y < y1) + *clipped_y1 = y1 - max_y; + else + *clipped_y1 = 0; + + return true; +} + +/** + * Clips a coordinate (left, right, top or bottom) for the src or dst rect + * (whichever requires the largest clip) and adjusts the coordinate + * for the other rect accordingly. + * + * \param mirror true if mirroring is required + * \param src the source rect coordinate (for example src_x0) + * \param dst0 the dst rect coordinate (for example dst_x0) + * \param dst1 the opposite dst rect coordinate (for example dst_x1) + * \param clipped_dst0 number of pixels to clip from the dst coordinate + * \param clipped_dst1 number of pixels to clip from the opposite dst coordinate + * \param scale the src vs dst scale involved for that coordinate + * \param is_left_or_bottom true if we are clipping the left or bottom sides + * of the rect. + */ +static void +clip_coordinates(bool mirror, + float *src, float *dst0, float *dst1, + float clipped_dst0, + float clipped_dst1, + float scale, + bool is_left_or_bottom) +{ + /* When clipping we need to add or subtract pixels from the original + * coordinates depending on whether we are acting on the left/bottom + * or right/top sides of the rect respectively. We assume we have to + * add them in the code below, and multiply by -1 when we should + * subtract. + */ + int mult = is_left_or_bottom ? 1 : -1; + + if (!mirror) { + *dst0 += clipped_dst0 * mult; + *src += clipped_dst0 * scale * mult; + } else { + *dst1 -= clipped_dst1 * mult; + *src += clipped_dst1 * scale * mult; + } +} + +/** + * Apply a scissor rectangle to blit coordinates. + * + * Returns true if the blit was entirely scissored away. + */ +static bool +apply_blit_scissor(const struct pipe_scissor_state *scissor, + float *src_x0, float *src_y0, + float *src_x1, float *src_y1, + float *dst_x0, float *dst_y0, + float *dst_x1, float *dst_y1, + bool mirror_x, bool mirror_y) +{ + float clip_dst_x0, clip_dst_x1, clip_dst_y0, clip_dst_y1; + + /* Compute number of pixels to scissor away. */ + if (!compute_pixels_clipped(*dst_x0, *dst_y0, *dst_x1, *dst_y1, + scissor->minx, scissor->miny, + scissor->maxx, scissor->maxy, + &clip_dst_x0, &clip_dst_y0, + &clip_dst_x1, &clip_dst_y1)) + return true; + + // XXX: comments assume source clipping, which we don't do + + /* When clipping any of the two rects we need to adjust the coordinates + * in the other rect considering the scaling factor involved. To obtain + * the best precision we want to make sure that we only clip once per + * side to avoid accumulating errors due to the scaling adjustment. + * + * For example, if src_x0 and dst_x0 need both to be clipped we want to + * avoid the situation where we clip src_x0 first, then adjust dst_x0 + * accordingly but then we realize that the resulting dst_x0 still needs + * to be clipped, so we clip dst_x0 and adjust src_x0 again. Because we are + * applying scaling factors to adjust the coordinates in each clipping + * pass we lose some precision and that can affect the results of the + * blorp blit operation slightly. What we want to do here is detect the + * rect that we should clip first for each side so that when we adjust + * the other rect we ensure the resulting coordinate does not need to be + * clipped again. + * + * The code below implements this by comparing the number of pixels that + * we need to clip for each side of both rects considering the scales + * involved. For example, clip_src_x0 represents the number of pixels + * to be clipped for the src rect's left side, so if clip_src_x0 = 5, + * clip_dst_x0 = 4 and scale_x = 2 it means that we are clipping more + * from the dst rect so we should clip dst_x0 only and adjust src_x0. + * This is because clipping 4 pixels in the dst is equivalent to + * clipping 4 * 2 = 8 > 5 in the src. + */ + + if (*src_x0 == *src_x1 || *src_y0 == *src_y1 + || *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1) + return true; + + float scale_x = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0); + float scale_y = (float) (*src_y1 - *src_y0) / (*dst_y1 - *dst_y0); + + /* Clip left side */ + clip_coordinates(mirror_x, src_x0, dst_x0, dst_x1, + clip_dst_x0, clip_dst_x1, scale_x, true); + + /* Clip right side */ + clip_coordinates(mirror_x, src_x1, dst_x1, dst_x0, + clip_dst_x1, clip_dst_x0, scale_x, false); + + /* Clip bottom side */ + clip_coordinates(mirror_y, src_y0, dst_y0, dst_y1, + clip_dst_y0, clip_dst_y1, scale_y, true); + + /* Clip top side */ + clip_coordinates(mirror_y, src_y1, dst_y1, dst_y0, + clip_dst_y1, clip_dst_y0, scale_y, false); + + /* Check for invalid bounds + * Can't blit for 0-dimensions + */ + return *src_x0 == *src_x1 || *src_y0 == *src_y1 + || *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1; +} + +void +crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl, + struct isl_device *isl_dev, + struct blorp_surf *surf, + struct pipe_resource *p_res, + enum isl_aux_usage aux_usage, + unsigned level, + bool is_render_target) +{ + struct crocus_resource *res = (void *) p_res; + + assert(!crocus_resource_unfinished_aux_import(res)); + + if (isl_aux_usage_has_hiz(aux_usage) && + !crocus_resource_level_has_hiz(res, level)) + aux_usage = ISL_AUX_USAGE_NONE; + + *surf = (struct blorp_surf) { + .surf = &res->surf, + .addr = (struct blorp_address) { + .buffer = res->bo, + .offset = res->offset, + .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, + .mocs = crocus_mocs(res->bo, isl_dev), + }, + .aux_usage = aux_usage, + }; + + if (aux_usage != ISL_AUX_USAGE_NONE) { + surf->aux_surf = &res->aux.surf; + surf->aux_addr = (struct blorp_address) { + .buffer = res->aux.bo, + .offset = res->aux.offset, + .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0, + .mocs = crocus_mocs(res->bo, isl_dev), + }; + surf->clear_color = + crocus_resource_get_clear_color(res); + } +} + +static void +tex_cache_flush_hack(struct crocus_batch *batch, + enum isl_format view_format, + enum isl_format surf_format) +{ + /* The WaSamplerCacheFlushBetweenRedescribedSurfaceReads workaround says: + * + * "Currently Sampler assumes that a surface would not have two + * different format associate with it. It will not properly cache + * the different views in the MT cache, causing a data corruption." + * + * We may need to handle this for texture views in general someday, but + * for now we handle it here, as it hurts copies and blits particularly + * badly because they ofter reinterpret formats. + * + * If the BO hasn't been referenced yet this batch, we assume that the + * texture cache doesn't contain any relevant data nor need flushing. + * + * Icelake (Gen11+) claims to fix this issue, but seems to still have + * issues with ASTC formats. + */ + bool need_flush = view_format != surf_format; + if (!need_flush) + return; + + const char *reason = + "workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads"; + + crocus_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL); + crocus_emit_pipe_control_flush(batch, reason, + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); +} + +static struct crocus_resource * +crocus_resource_for_aspect(const struct intel_device_info *devinfo, + struct pipe_resource *p_res, unsigned pipe_mask) +{ + if (pipe_mask == PIPE_MASK_S) { + struct crocus_resource *junk, *s_res; + crocus_get_depth_stencil_resources(devinfo, p_res, &junk, &s_res); + return s_res; + } else { + return (struct crocus_resource *)p_res; + } +} + +static enum pipe_format +pipe_format_for_aspect(enum pipe_format format, unsigned pipe_mask) +{ + if (pipe_mask == PIPE_MASK_S) { + return util_format_stencil_only(format); + } else if (pipe_mask == PIPE_MASK_Z) { + return util_format_get_depth_only(format); + } else { + return format; + } +} + +static void +crocus_u_blitter(struct crocus_context *ice, + const struct pipe_blit_info *info) +{ + struct pipe_blit_info dinfo = *info; + if (!util_format_has_alpha(dinfo.dst.resource->format)) + dinfo.mask &= ~PIPE_MASK_A; + crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable); + util_blitter_blit(ice->blitter, &dinfo); +} + +/** + * The pipe->blit() driver hook. + * + * This performs a blit between two surfaces, which copies data but may + * also perform format conversion, scaling, flipping, and so on. + */ +static void +crocus_blit(struct pipe_context *ctx, const struct pipe_blit_info *info) +{ + struct crocus_context *ice = (void *) ctx; + struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; + const struct intel_device_info *devinfo = &screen->devinfo; + struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; + enum blorp_batch_flags blorp_flags = 0; + + /* We don't support color masking. */ + assert((info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA || + (info->mask & PIPE_MASK_RGBA) == 0); + + if (info->render_condition_enable) + if (!crocus_check_conditional_render(ice)) + return; + + if (devinfo->ver <= 5) { + if (!screen->vtbl.blit_blt(batch, info)) { + + if (!util_format_is_depth_or_stencil(info->src.resource->format) && + info->dst.resource->target != PIPE_TEXTURE_3D) + goto use_blorp; + + if (!util_blitter_is_blit_supported(ice->blitter, info)) { + if (util_format_is_depth_or_stencil(info->src.resource->format)) { + + struct pipe_blit_info depth_blit = *info; + depth_blit.mask = PIPE_MASK_Z; + crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable); + util_blitter_blit(ice->blitter, &depth_blit); + + struct pipe_surface *dst_view, dst_templ; + util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z); + dst_view = ctx->create_surface(ctx, info->dst.resource, &dst_templ); + + crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable); + + util_blitter_clear_depth_stencil(ice->blitter, dst_view, PIPE_CLEAR_STENCIL, + 0, 0, info->dst.box.x, info->dst.box.y, + info->dst.box.width, info->dst.box.height); + crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable); + util_blitter_stencil_fallback(ice->blitter, + info->dst.resource, + info->dst.level, + &info->dst.box, + info->src.resource, + info->src.level, + &info->src.box, NULL); + + } + return; + } + + crocus_u_blitter(ice, info); + } + return; + } + + if (devinfo->ver == 6) { + if (info->src.resource->target == PIPE_TEXTURE_3D && + info->dst.resource->target == PIPE_TEXTURE_3D) { + crocus_u_blitter(ice, info); + return; + } + } + +use_blorp: + if (info->render_condition_enable) { + if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) + blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE; + } + + float src_x0 = info->src.box.x; + float src_x1 = info->src.box.x + info->src.box.width; + float src_y0 = info->src.box.y; + float src_y1 = info->src.box.y + info->src.box.height; + float dst_x0 = info->dst.box.x; + float dst_x1 = info->dst.box.x + info->dst.box.width; + float dst_y0 = info->dst.box.y; + float dst_y1 = info->dst.box.y + info->dst.box.height; + bool mirror_x = apply_mirror(&src_x0, &src_x1); + bool mirror_y = apply_mirror(&src_y0, &src_y1); + enum blorp_filter filter; + + if (info->scissor_enable) { + bool noop = apply_blit_scissor(&info->scissor, + &src_x0, &src_y0, &src_x1, &src_y1, + &dst_x0, &dst_y0, &dst_x1, &dst_y1, + mirror_x, mirror_y); + if (noop) + return; + } + + if (abs(info->dst.box.width) == abs(info->src.box.width) && + abs(info->dst.box.height) == abs(info->src.box.height)) { + if (info->src.resource->nr_samples > 1 && + info->dst.resource->nr_samples <= 1) { + /* The OpenGL ES 3.2 specification, section 16.2.1, says: + * + * "If the read framebuffer is multisampled (its effective + * value of SAMPLE_BUFFERS is one) and the draw framebuffer + * is not (its value of SAMPLE_BUFFERS is zero), the samples + * corresponding to each pixel location in the source are + * converted to a single sample before being written to the + * destination. The filter parameter is ignored. If the + * source formats are integer types or stencil values, a + * single sample’s value is selected for each pixel. If the + * source formats are floating-point or normalized types, + * the sample values for each pixel are resolved in an + * implementation-dependent manner. If the source formats + * are depth values, sample values are resolved in an + * implementation-dependent manner where the result will be + * between the minimum and maximum depth values in the pixel." + * + * When selecting a single sample, we always choose sample 0. + */ + if (util_format_is_depth_or_stencil(info->src.format) || + util_format_is_pure_integer(info->src.format)) { + filter = BLORP_FILTER_SAMPLE_0; + } else { + filter = BLORP_FILTER_AVERAGE; + } + } else { + /* The OpenGL 4.6 specification, section 18.3.1, says: + * + * "If the source and destination dimensions are identical, + * no filtering is applied." + * + * Using BLORP_FILTER_NONE will also handle the upsample case by + * replicating the one value in the source to all values in the + * destination. + */ + filter = BLORP_FILTER_NONE; + } + } else if (info->filter == PIPE_TEX_FILTER_LINEAR) { + filter = BLORP_FILTER_BILINEAR; + } else { + filter = BLORP_FILTER_NEAREST; + } + + struct blorp_batch blorp_batch; + blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags); + + float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth; + + /* There is no interpolation to the pixel center during rendering, so + * add the 0.5 offset ourselves here. + */ + float depth_center_offset = 0; + if (info->src.resource->target == PIPE_TEXTURE_3D) + depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth; + + /* Perform a blit for each aspect requested by the caller. PIPE_MASK_R is + * used to represent the color aspect. */ + unsigned aspect_mask = info->mask & (PIPE_MASK_R | PIPE_MASK_ZS); + while (aspect_mask) { + unsigned aspect = 1 << u_bit_scan(&aspect_mask); + + struct crocus_resource *src_res = + crocus_resource_for_aspect(devinfo, info->src.resource, aspect); + struct crocus_resource *dst_res = + crocus_resource_for_aspect(devinfo, info->dst.resource, aspect); + + enum pipe_format src_pfmt = + pipe_format_for_aspect(info->src.format, aspect); + enum pipe_format dst_pfmt = + pipe_format_for_aspect(info->dst.format, aspect); + + if (crocus_resource_unfinished_aux_import(src_res)) + crocus_resource_finish_aux_import(ctx->screen, src_res); + if (crocus_resource_unfinished_aux_import(dst_res)) + crocus_resource_finish_aux_import(ctx->screen, dst_res); + + struct crocus_format_info src_fmt = + crocus_format_for_usage(devinfo, src_pfmt, ISL_SURF_USAGE_TEXTURE_BIT); + enum isl_aux_usage src_aux_usage = + crocus_resource_texture_aux_usage(src_res); + + crocus_resource_prepare_texture(ice, src_res, src_fmt.fmt, + info->src.level, 1, info->src.box.z, + info->src.box.depth); + // crocus_emit_buffer_barrier_for(batch, src_res->bo, + // CROCUS_DOMAIN_OTHER_READ); + + struct crocus_format_info dst_fmt = + crocus_format_for_usage(devinfo, dst_pfmt, + ISL_SURF_USAGE_RENDER_TARGET_BIT); + enum isl_aux_usage dst_aux_usage = + crocus_resource_render_aux_usage(ice, dst_res, info->dst.level, + dst_fmt.fmt, false); + + struct blorp_surf src_surf, dst_surf; + crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf, + &src_res->base, src_aux_usage, + info->src.level, false); + crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf, + &dst_res->base, dst_aux_usage, + info->dst.level, true); + + crocus_resource_prepare_render(ice, dst_res, info->dst.level, + info->dst.box.z, info->dst.box.depth, + dst_aux_usage); + // crocus_emit_buffer_barrier_for(batch, dst_res->bo, + // CROCUS_DOMAIN_RENDER_WRITE); + + if (crocus_batch_references(batch, src_res->bo)) + tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format); + + if (dst_res->base.target == PIPE_BUFFER) { + util_range_add(&dst_res->base, &dst_res->valid_buffer_range, + dst_x0, dst_x1); + } + + struct isl_swizzle src_swiz = pipe_to_isl_swizzles(src_fmt.swizzles); + struct isl_swizzle dst_swiz = pipe_to_isl_swizzles(dst_fmt.swizzles); + + for (int slice = 0; slice < info->dst.box.depth; slice++) { + unsigned dst_z = info->dst.box.z + slice; + float src_z = info->src.box.z + slice * src_z_step + + depth_center_offset; + + crocus_batch_maybe_flush(batch, 1500); + + blorp_blit(&blorp_batch, + &src_surf, info->src.level, src_z, + src_fmt.fmt, src_swiz, + &dst_surf, info->dst.level, dst_z, + dst_fmt.fmt, dst_swiz, + src_x0, src_y0, src_x1, src_y1, + dst_x0, dst_y0, dst_x1, dst_y1, + filter, mirror_x, mirror_y); + + } + + tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format); + + crocus_resource_finish_render(ice, dst_res, info->dst.level, + info->dst.box.z, info->dst.box.depth, + dst_aux_usage); + } + + blorp_batch_finish(&blorp_batch); + + crocus_flush_and_dirty_for_history(ice, batch, (struct crocus_resource *) + info->dst.resource, + PIPE_CONTROL_RENDER_TARGET_FLUSH, + "cache history: post-blit"); +} + +static void +get_copy_region_aux_settings(struct crocus_resource *res, + enum isl_aux_usage *out_aux_usage, + bool is_render_target) +{ + switch (res->aux.usage) { + case ISL_AUX_USAGE_MCS: + /* A stencil resolve operation must be performed prior to doing resource + * copies or used by CPU. + * (see HSD 1209978162) + */ + if (is_render_target && isl_surf_usage_is_stencil(res->surf.usage)) { + *out_aux_usage = ISL_AUX_USAGE_NONE; + } else { + *out_aux_usage = res->aux.usage; + } + break; + default: + *out_aux_usage = ISL_AUX_USAGE_NONE; + break; + } +} + +/** + * Perform a GPU-based raw memory copy between compatible view classes. + * + * Does not perform any flushing - the new data may still be left in the + * render cache, and old data may remain in other caches. + * + * Wraps blorp_copy() and blorp_buffer_copy(). + */ +void +crocus_copy_region(struct blorp_context *blorp, + struct crocus_batch *batch, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct blorp_batch blorp_batch; + struct crocus_context *ice = blorp->driver_ctx; + struct crocus_screen *screen = (void *) ice->ctx.screen; + const struct intel_device_info *devinfo = &screen->devinfo; + struct crocus_resource *src_res = (void *) src; + struct crocus_resource *dst_res = (void *) dst; + + if (devinfo->ver <= 5) { + if (screen->vtbl.copy_region_blt(batch, dst_res, + dst_level, dstx, dsty, dstz, + src_res, src_level, src_box)) + return; + } + enum isl_aux_usage src_aux_usage, dst_aux_usage; + get_copy_region_aux_settings(src_res, &src_aux_usage, + false); + get_copy_region_aux_settings(dst_res, &dst_aux_usage, + true); + + if (crocus_batch_references(batch, src_res->bo)) + tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format); + + if (dst->target == PIPE_BUFFER) + util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dstx, dstx + src_box->width); + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + struct blorp_address src_addr = { + .buffer = crocus_resource_bo(src), .offset = src_box->x, + }; + struct blorp_address dst_addr = { + .buffer = crocus_resource_bo(dst), .offset = dstx, + .reloc_flags = EXEC_OBJECT_WRITE, + }; + + crocus_batch_maybe_flush(batch, 1500); + + blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); + blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width); + blorp_batch_finish(&blorp_batch); + } else { + // XXX: what about one surface being a buffer and not the other? + + struct blorp_surf src_surf, dst_surf; + crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf, + src, src_aux_usage, src_level, false); + crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf, + dst, dst_aux_usage, dst_level, true); + + crocus_resource_prepare_access(ice, src_res, src_level, 1, + src_box->z, src_box->depth, + src_aux_usage, false); + crocus_resource_prepare_access(ice, dst_res, dst_level, 1, + dstz, src_box->depth, + dst_aux_usage, false); + + blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); + + for (int slice = 0; slice < src_box->depth; slice++) { + crocus_batch_maybe_flush(batch, 1500); + + blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice, + &dst_surf, dst_level, dstz + slice, + src_box->x, src_box->y, dstx, dsty, + src_box->width, src_box->height); + } + blorp_batch_finish(&blorp_batch); + + crocus_resource_finish_write(ice, dst_res, dst_level, dstz, + src_box->depth, dst_aux_usage); + } + + tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format); +} + +static struct crocus_batch * +get_preferred_batch(struct crocus_context *ice, struct crocus_bo *bo) +{ + /* If the compute batch is already using this buffer, we'd prefer to + * continue queueing in the compute batch. + */ + if (crocus_batch_references(&ice->batches[CROCUS_BATCH_COMPUTE], bo)) + return &ice->batches[CROCUS_BATCH_COMPUTE]; + + /* Otherwise default to the render batch. */ + return &ice->batches[CROCUS_BATCH_RENDER]; +} + + +/** + * The pipe->resource_copy_region() driver hook. + * + * This implements ARB_copy_image semantics - a raw memory copy between + * compatible view classes. + */ +static void +crocus_resource_copy_region(struct pipe_context *ctx, + struct pipe_resource *p_dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *p_src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct crocus_context *ice = (void *) ctx; + struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; + struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; + const struct intel_device_info *devinfo = &screen->devinfo; + struct crocus_resource *src = (void *) p_src; + struct crocus_resource *dst = (void *) p_dst; + + if (crocus_resource_unfinished_aux_import(src)) + crocus_resource_finish_aux_import(ctx->screen, src); + if (crocus_resource_unfinished_aux_import(dst)) + crocus_resource_finish_aux_import(ctx->screen, dst); + + /* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */ + if (p_src->target == PIPE_BUFFER && p_dst->target == PIPE_BUFFER && + (src_box->width % 4 == 0) && src_box->width <= 16 && + screen->vtbl.copy_mem_mem) { + struct crocus_bo *dst_bo = crocus_resource_bo(p_dst); + batch = get_preferred_batch(ice, dst_bo); + crocus_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4)); + crocus_emit_pipe_control_flush(batch, + "stall for MI_COPY_MEM_MEM copy_region", + PIPE_CONTROL_CS_STALL); + screen->vtbl.copy_mem_mem(batch, dst_bo, dstx, crocus_resource_bo(p_src), + src_box->x, src_box->width); + return; + } + + if (devinfo->ver < 6 && util_format_is_depth_or_stencil(p_dst->format)) { + util_resource_copy_region(ctx, p_dst, dst_level, dstx, dsty, dstz, + p_src, src_level, src_box); + return; + } + crocus_copy_region(&ice->blorp, batch, p_dst, dst_level, dstx, dsty, dstz, + p_src, src_level, src_box); + + if (util_format_is_depth_and_stencil(p_dst->format) && + util_format_has_stencil(util_format_description(p_src->format)) && + devinfo->ver >= 6) { + struct crocus_resource *junk, *s_src_res, *s_dst_res; + crocus_get_depth_stencil_resources(devinfo, p_src, &junk, &s_src_res); + crocus_get_depth_stencil_resources(devinfo, p_dst, &junk, &s_dst_res); + + crocus_copy_region(&ice->blorp, batch, &s_dst_res->base, dst_level, dstx, + dsty, dstz, &s_src_res->base, src_level, src_box); + } + + crocus_flush_and_dirty_for_history(ice, batch, dst, + PIPE_CONTROL_RENDER_TARGET_FLUSH, + "cache history: post copy_region"); +} + +void +crocus_init_blit_functions(struct pipe_context *ctx) +{ + ctx->blit = crocus_blit; + ctx->resource_copy_region = crocus_resource_copy_region; +} |