diff options
author | Zbigniew Kempczyński <zbigniew.kempczynski@intel.com> | 2022-12-15 12:35:18 +0100 |
---|---|---|
committer | Zbigniew Kempczyński <zbigniew.kempczynski@intel.com> | 2022-12-19 11:10:28 +0100 |
commit | 3259f6e450a8ef6eea727927d4435a52236f251b (patch) | |
tree | 45a033c59246b0ab01c4cd0b1a68d34e40dc54b6 | |
parent | a1b197d9d0bcb8d5b5ccf3810a1e70e726cab418 (diff) |
lib/i915_blt: Extract blit emit functions
Add some flexibility in building user pipelines extracting blitter
emission code to dedicated functions. Previous blitter functions which
do one blit-and-execute are rewritten to use those functions.
Requires usage with stateful allocator (offset might be acquired more
than one, so it must not change).
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Cc: Karolina Stolarek <karolina.stolarek@intel.com>
Reviewed-by: Karolina Stolarek <karolina.stolarek@intel.com>
-rw-r--r-- | lib/i915/i915_blt.c | 273 | ||||
-rw-r--r-- | lib/i915/i915_blt.h | 19 |
2 files changed, 219 insertions, 73 deletions
diff --git a/lib/i915/i915_blt.c b/lib/i915/i915_blt.c index 42c28623f..541935656 100644 --- a/lib/i915/i915_blt.c +++ b/lib/i915/i915_blt.c @@ -503,64 +503,68 @@ static void dump_bb_ext(struct gen12_block_copy_data_ext *data) } /** - * blt_block_copy: + * emit_blt_block_copy: * @i915: drm fd - * @ctx: intel_ctx_t context - * @e: blitter engine for @ctx * @ahnd: allocator handle * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version) * @ext: extended blitter data (for DG2+, supports flatccs compression) + * @bb_pos: position at which insert block copy commands + * @emit_bbe: emit MI_BATCH_BUFFER_END after block-copy or not * - * Function does blit between @src and @dst described in @blt object. + * Function inserts block-copy blit into batch at @bb_pos. Allows concatenating + * with other commands to achieve pipelining. * * Returns: - * execbuffer status. + * Next write position in batch. */ -int blt_block_copy(int i915, - const intel_ctx_t *ctx, - const struct intel_execution_engine2 *e, - uint64_t ahnd, - const struct blt_copy_data *blt, - const struct blt_block_copy_data_ext *ext) +uint64_t emit_blt_block_copy(int i915, + uint64_t ahnd, + const struct blt_copy_data *blt, + const struct blt_block_copy_data_ext *ext, + uint64_t bb_pos, + bool emit_bbe) { - struct drm_i915_gem_execbuffer2 execbuf = {}; - struct drm_i915_gem_exec_object2 obj[3] = {}; struct gen12_block_copy_data data = {}; struct gen12_block_copy_data_ext dext = {}; uint64_t dst_offset, src_offset, bb_offset, alignment; - uint32_t *bb; - int i, ret; + uint32_t bbe = MI_BATCH_BUFFER_END; + uint8_t *bb; igt_assert_f(ahnd, "block-copy supports softpin only\n"); igt_assert_f(blt, "block-copy requires data to do blit\n"); alignment = gem_detect_safe_alignment(i915); src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment); - if (__special_mode(blt) == SM_FULL_RESOLVE) - dst_offset = src_offset; - else - dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment); + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment); bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment); fill_data(&data, blt, src_offset, dst_offset, ext); - i = sizeof(data) / sizeof(uint32_t); bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size, PROT_READ | PROT_WRITE); - memcpy(bb, &data, sizeof(data)); + + igt_assert(bb_pos + sizeof(data) < blt->bb.size); + memcpy(bb + bb_pos, &data, sizeof(data)); + bb_pos += sizeof(data); if (ext) { fill_data_ext(&dext, ext); - memcpy(bb + i, &dext, sizeof(dext)); - i += sizeof(dext) / sizeof(uint32_t); + igt_assert(bb_pos + sizeof(dext) < blt->bb.size); + memcpy(bb + bb_pos, &dext, sizeof(dext)); + bb_pos += sizeof(dext); + } + + if (emit_bbe) { + igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size); + memcpy(bb + bb_pos, &bbe, sizeof(bbe)); + bb_pos += sizeof(uint32_t); } - bb[i++] = MI_BATCH_BUFFER_END; if (blt->print_bb) { igt_info("[BLOCK COPY]\n"); - igt_info("src offset: %llx, dst offset: %llx, bb offset: %llx\n", - (long long) src_offset, (long long) dst_offset, - (long long) bb_offset); + igt_info("src offset: %" PRIx64 ", dst offset: %" PRIx64 + ", bb offset: %" PRIx64 "\n", + src_offset, dst_offset, bb_offset); dump_bb_cmd(&data); if (ext) @@ -569,6 +573,45 @@ int blt_block_copy(int i915, munmap(bb, blt->bb.size); + return bb_pos; +} + +/** + * blt_block_copy: + * @i915: drm fd + * @ctx: intel_ctx_t context + * @e: blitter engine for @ctx + * @ahnd: allocator handle + * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version) + * @ext: extended blitter data (for DG2+, supports flatccs compression) + * + * Function does blit between @src and @dst described in @blt object. + * + * Returns: + * execbuffer status. + */ +int blt_block_copy(int i915, + const intel_ctx_t *ctx, + const struct intel_execution_engine2 *e, + uint64_t ahnd, + const struct blt_copy_data *blt, + const struct blt_block_copy_data_ext *ext) +{ + struct drm_i915_gem_execbuffer2 execbuf = {}; + struct drm_i915_gem_exec_object2 obj[3] = {}; + uint64_t dst_offset, src_offset, bb_offset, alignment; + int ret; + + igt_assert_f(ahnd, "block-copy supports softpin only\n"); + igt_assert_f(blt, "block-copy requires data to do blit\n"); + + alignment = gem_detect_safe_alignment(i915); + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment); + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment); + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment); + + emit_blt_block_copy(i915, ahnd, blt, ext, 0, true); + obj[0].offset = CANONICAL(dst_offset); obj[1].offset = CANONICAL(src_offset); obj[2].offset = CANONICAL(bb_offset); @@ -655,31 +698,30 @@ static void dump_bb_surf_ctrl_cmd(const struct gen12_ctrl_surf_copy_data *data) } /** - * blt_ctrl_surf_copy: + * emit_blt_ctrl_surf_copy: * @i915: drm fd - * @ctx: intel_ctx_t context - * @e: blitter engine for @ctx * @ahnd: allocator handle * @surf: blitter data for ctrl-surf-copy + * @bb_pos: position at which insert block copy commands + * @emit_bbe: emit MI_BATCH_BUFFER_END after ctrl-surf-copy or not * - * Function does ctrl-surf-copy blit between @src and @dst described in - * @blt object. + * Function emits ctrl-surf-copy blit between @src and @dst described in + * @blt object at @bb_pos. Allows concatenating with other commands to + * achieve pipelining. * * Returns: - * execbuffer status. + * Next write position in batch. */ -int blt_ctrl_surf_copy(int i915, - const intel_ctx_t *ctx, - const struct intel_execution_engine2 *e, - uint64_t ahnd, - const struct blt_ctrl_surf_copy_data *surf) +uint64_t emit_blt_ctrl_surf_copy(int i915, + uint64_t ahnd, + const struct blt_ctrl_surf_copy_data *surf, + uint64_t bb_pos, + bool emit_bbe) { - struct drm_i915_gem_execbuffer2 execbuf = {}; - struct drm_i915_gem_exec_object2 obj[3] = {}; struct gen12_ctrl_surf_copy_data data = {}; uint64_t dst_offset, src_offset, bb_offset, alignment; + uint32_t bbe = MI_BATCH_BUFFER_END; uint32_t *bb; - int i; igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n"); igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n"); @@ -695,12 +737,9 @@ int blt_ctrl_surf_copy(int i915, data.dw00.size_of_ctrl_copy = __ccs_size(surf) / CCS_RATIO - 1; data.dw00.length = 0x3; - src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, - alignment); - dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, - alignment); - bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, - alignment); + src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment); + dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment); + bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment); data.dw01.src_address_lo = src_offset; data.dw02.src_address_hi = src_offset >> 32; @@ -710,22 +749,67 @@ int blt_ctrl_surf_copy(int i915, data.dw04.dst_address_hi = dst_offset >> 32; data.dw04.dst_mocs = surf->dst.mocs; - i = sizeof(data) / sizeof(uint32_t); bb = gem_mmap__device_coherent(i915, surf->bb.handle, 0, surf->bb.size, PROT_READ | PROT_WRITE); - memcpy(bb, &data, sizeof(data)); - bb[i++] = MI_BATCH_BUFFER_END; + + igt_assert(bb_pos + sizeof(data) < surf->bb.size); + memcpy(bb + bb_pos, &data, sizeof(data)); + bb_pos += sizeof(data); + + if (emit_bbe) { + igt_assert(bb_pos + sizeof(uint32_t) < surf->bb.size); + memcpy(bb + bb_pos, &bbe, sizeof(bbe)); + bb_pos += sizeof(uint32_t); + } if (surf->print_bb) { - igt_info("BB [CTRL SURF]:\n"); - igt_info("src offset: %llx, dst offset: %llx, bb offset: %llx\n", - (long long) src_offset, (long long) dst_offset, - (long long) bb_offset); + igt_info("[CTRL SURF]:\n"); + igt_info("src offset: %" PRIx64 ", dst offset: %" PRIx64 + ", bb offset: %" PRIx64 "\n", + src_offset, dst_offset, bb_offset); dump_bb_surf_ctrl_cmd(&data); } + munmap(bb, surf->bb.size); + return bb_pos; +} + +/** + * blt_ctrl_surf_copy: + * @i915: drm fd + * @ctx: intel_ctx_t context + * @e: blitter engine for @ctx + * @ahnd: allocator handle + * @surf: blitter data for ctrl-surf-copy + * + * Function does ctrl-surf-copy blit between @src and @dst described in + * @blt object. + * + * Returns: + * execbuffer status. + */ +int blt_ctrl_surf_copy(int i915, + const intel_ctx_t *ctx, + const struct intel_execution_engine2 *e, + uint64_t ahnd, + const struct blt_ctrl_surf_copy_data *surf) +{ + struct drm_i915_gem_execbuffer2 execbuf = {}; + struct drm_i915_gem_exec_object2 obj[3] = {}; + uint64_t dst_offset, src_offset, bb_offset, alignment; + + igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n"); + igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n"); + + alignment = max_t(uint64_t, gem_detect_safe_alignment(i915), 1ull << 16); + src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment); + dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment); + bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment); + + emit_blt_ctrl_surf_copy(i915, ahnd, surf, 0, true); + obj[0].offset = CANONICAL(dst_offset); obj[1].offset = CANONICAL(src_offset); obj[2].offset = CANONICAL(bb_offset); @@ -869,31 +953,31 @@ static void dump_bb_fast_cmd(struct gen12_fast_copy_data *data) } /** - * blt_fast_copy: + * emit_blt_fast_copy: * @i915: drm fd - * @ctx: intel_ctx_t context - * @e: blitter engine for @ctx * @ahnd: allocator handle * @blt: blitter data for fast-copy (same as for block-copy but doesn't use * compression fields). + * @bb_pos: position at which insert block copy commands + * @emit_bbe: emit MI_BATCH_BUFFER_END after fast-copy or not * - * Function does fast blit between @src and @dst described in @blt object. + * Function emits fast-copy blit between @src and @dst described in @blt object + * at @bb_pos. Allows concatenating with other commands to + * achieve pipelining. * * Returns: - * execbuffer status. + * Next write position in batch. */ -int blt_fast_copy(int i915, - const intel_ctx_t *ctx, - const struct intel_execution_engine2 *e, - uint64_t ahnd, - const struct blt_copy_data *blt) +uint64_t emit_blt_fast_copy(int i915, + uint64_t ahnd, + const struct blt_copy_data *blt, + uint64_t bb_pos, + bool emit_bbe) { - struct drm_i915_gem_execbuffer2 execbuf = {}; - struct drm_i915_gem_exec_object2 obj[3] = {}; struct gen12_fast_copy_data data = {}; uint64_t dst_offset, src_offset, bb_offset, alignment; + uint32_t bbe = MI_BATCH_BUFFER_END; uint32_t *bb; - int i, ret; alignment = gem_detect_safe_alignment(i915); @@ -931,22 +1015,65 @@ int blt_fast_copy(int i915, data.dw08.src_address_lo = src_offset; data.dw09.src_address_hi = src_offset >> 32; - i = sizeof(data) / sizeof(uint32_t); bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size, PROT_READ | PROT_WRITE); - memcpy(bb, &data, sizeof(data)); - bb[i++] = MI_BATCH_BUFFER_END; + igt_assert(bb_pos + sizeof(data) < blt->bb.size); + memcpy(bb + bb_pos, &data, sizeof(data)); + bb_pos += sizeof(data); + + if (emit_bbe) { + igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size); + memcpy(bb + bb_pos, &bbe, sizeof(bbe)); + bb_pos += sizeof(uint32_t); + } if (blt->print_bb) { - igt_info("BB [FAST COPY]\n"); - igt_info("blit [src offset: %llx, dst offset: %llx\n", - (long long) src_offset, (long long) dst_offset); + igt_info("[FAST COPY]\n"); + igt_info("src offset: %" PRIx64 ", dst offset: %" PRIx64 + ", bb offset: %" PRIx64 "\n", + src_offset, dst_offset, bb_offset); dump_bb_fast_cmd(&data); } munmap(bb, blt->bb.size); + return bb_pos; +} + +/** + * blt_fast_copy: + * @i915: drm fd + * @ctx: intel_ctx_t context + * @e: blitter engine for @ctx + * @ahnd: allocator handle + * @blt: blitter data for fast-copy (same as for block-copy but doesn't use + * compression fields). + * + * Function does fast blit between @src and @dst described in @blt object. + * + * Returns: + * execbuffer status. + */ +int blt_fast_copy(int i915, + const intel_ctx_t *ctx, + const struct intel_execution_engine2 *e, + uint64_t ahnd, + const struct blt_copy_data *blt) +{ + struct drm_i915_gem_execbuffer2 execbuf = {}; + struct drm_i915_gem_exec_object2 obj[3] = {}; + uint64_t dst_offset, src_offset, bb_offset, alignment; + int ret; + + alignment = gem_detect_safe_alignment(i915); + + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment); + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment); + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment); + + emit_blt_fast_copy(i915, ahnd, blt, 0, true); + obj[0].offset = CANONICAL(dst_offset); obj[1].offset = CANONICAL(src_offset); obj[2].offset = CANONICAL(bb_offset); diff --git a/lib/i915/i915_blt.h b/lib/i915/i915_blt.h index e0e8b52bc..34db9bb96 100644 --- a/lib/i915/i915_blt.h +++ b/lib/i915/i915_blt.h @@ -168,6 +168,13 @@ bool blt_supports_compression(int i915); bool blt_supports_tiling(int i915, enum blt_tiling tiling); const char *blt_tiling_name(enum blt_tiling tiling); +uint64_t emit_blt_block_copy(int i915, + uint64_t ahnd, + const struct blt_copy_data *blt, + const struct blt_block_copy_data_ext *ext, + uint64_t bb_pos, + bool emit_bbe); + int blt_block_copy(int i915, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e, @@ -175,12 +182,24 @@ int blt_block_copy(int i915, const struct blt_copy_data *blt, const struct blt_block_copy_data_ext *ext); +uint64_t emit_blt_ctrl_surf_copy(int i915, + uint64_t ahnd, + const struct blt_ctrl_surf_copy_data *surf, + uint64_t bb_pos, + bool emit_bbe); + int blt_ctrl_surf_copy(int i915, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e, uint64_t ahnd, const struct blt_ctrl_surf_copy_data *surf); +uint64_t emit_blt_fast_copy(int i915, + uint64_t ahnd, + const struct blt_copy_data *blt, + uint64_t bb_pos, + bool emit_bbe); + int blt_fast_copy(int i915, const intel_ctx_t *ctx, const struct intel_execution_engine2 *e, |