diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_migrate.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_migrate.c | 282 |
1 files changed, 269 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 05262f1b438e..6555cc92dc48 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -497,9 +497,123 @@ static bool wa_1209644611_applies(int ver, u32 size) * location. */ -static int emit_copy(struct i915_request *rq, - u32 dst_offset, u32 src_offset, int size) +static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) { + *cmd++ = MI_FLUSH_DW | flags; + *cmd++ = 0; + *cmd++ = 0; + + return cmd; +} + +static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size) +{ + u32 num_cmds, num_blks, total_size; + + if (!GET_CCS_BYTES(i915, size)) + return 0; + + /* + * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte + * blocks. one XY_CTRL_SURF_COPY_BLT command can + * transfer upto 1024 blocks. + */ + num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), + NUM_CCS_BYTES_PER_BLOCK); + num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER); + total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds; + + /* + * Adding a flush before and after XY_CTRL_SURF_COPY_BLT + */ + total_size += 2 * MI_FLUSH_DW_SIZE; + + return total_size; +} + +static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr, + u8 src_mem_access, u8 dst_mem_access, + int src_mocs, int dst_mocs, + u32 ccs_blocks) +{ + /* + * The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS + * data in and out of the CCS region. + * + * We can copy at most 1024 blocks of 256 bytes using one + * XY_CTRL_SURF_COPY_BLT instruction. + * + * In case we need to copy more than 1024 blocks, we need to add + * another instruction to the same batch buffer. + * + * 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS. + * + * 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM. + */ + do { + int blks_per_copy; + + blks_per_copy = ccs_blocks >= NUM_CCS_BLKS_PER_XFER ? + NUM_CCS_BLKS_PER_XFER : ccs_blocks; + *cmd++ = XY_CTRL_SURF_COPY_BLT | + src_mem_access << SRC_ACCESS_TYPE_SHIFT | + dst_mem_access << DST_ACCESS_TYPE_SHIFT | + FIELD_PREP(CCS_SIZE_MASK, blks_per_copy - 1); + *cmd++ = lower_32_bits(src_addr); + *cmd++ = (upper_32_bits(src_addr) & 0xFFFF) | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, src_mocs); + *cmd++ = lower_32_bits(dst_addr); + *cmd++ = (upper_32_bits(dst_addr) & 0xFFFF) | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, dst_mocs); + src_addr += SZ_64M; + dst_addr += SZ_64M; + ccs_blocks -= blks_per_copy; + } while (ccs_blocks > 0); + + return cmd; +} + +static int emit_ccs_copy(struct i915_request *rq, + bool dst_is_lmem, u32 dst_offset, + bool src_is_lmem, u32 src_offset, int size) +{ + struct drm_i915_private *i915 = rq->engine->i915; + int mocs = rq->engine->gt->mocs.uc_index << 1; + u32 num_ccs_blks, ccs_ring_size; + u8 src_access, dst_access; + u32 *cs; + + GEM_BUG_ON(!(src_is_lmem ^ dst_is_lmem) || !HAS_FLAT_CCS(i915)); + + ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); + WARN_ON(!ccs_ring_size); + + cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), + NUM_CCS_BYTES_PER_BLOCK); + + src_access = !src_is_lmem && dst_is_lmem; + dst_access = !src_access; + + cs = i915_flush_dw(cs, MI_FLUSH_LLC | MI_FLUSH_CCS); + cs = _i915_ctrl_surf_copy_blt(cs, src_offset, dst_offset, + src_access, dst_access, + mocs, mocs, num_ccs_blks); + cs = i915_flush_dw(cs, MI_FLUSH_LLC | MI_FLUSH_CCS); + if (ccs_ring_size & 1) + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int emit_main_copy(struct i915_request *rq, + bool dst_is_lmem, u32 dst_offset, + bool src_is_lmem, u32 src_offset, int size) const int ver = GRAPHICS_VER(rq->engine->i915); u32 instance = rq->engine->instance; u32 *cs; @@ -544,6 +658,91 @@ static int emit_copy(struct i915_request *rq, return 0; } +static int scatter_list_length(struct scatterlist *sg) +{ + int len = 0; + + while (sg && sg_dma_len(sg)) { + len += sg_dma_len(sg); + sg = sg_next(sg); + }; + + return len; +} + +static void +calculate_src_dst_sz(struct drm_i915_private *i915, bool src_is_lmem, + u32 *src_sz, u32 *dst_sz, u32 bytes_to_cpy, + bool ccs_copy, u32 ccs_bytes) +{ + u32 ccs_sz, smem_sz; + + if (ccs_copy) { + /* + * We can only copy the ccs data corresponding to + * the CHUNK_SZ of lmem which is + * GET_CCS_BYTES(i915, CHUNK_SZ)) + */ + ccs_sz = min_t(int, bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ)); + + /* Flat-CCS: CCS data copy */ + if (!src_is_lmem) { /* src is smem */ + *src_sz = ccs_sz; + *dst_sz = CHUNK_SZ; + } else { + *src_sz = CHUNK_SZ; + *dst_sz = ccs_sz; + } + } else if (!ccs_copy && ccs_bytes) { + /* + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ will + * be taken for the blt. in Flat-ccs supported platform Smem + * obj will have more pages than required for main meory + * hence limit it to the required size for main memory + */ + smem_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); + /* Flat-CCS: Main memory copy */ + if (!src_is_lmem) { + *src_sz = smem_sz; + *dst_sz = CHUNK_SZ; + } else { + *dst_sz = smem_sz; + *src_sz = CHUNK_SZ; + } + } else { /* ccs handling is not required */ + *src_sz = CHUNK_SZ; + } +} + +static int emit_copy(struct i915_request *rq, bool dst_is_lmem, u32 dst_offset, + bool src_is_lmem, u32 src_offset, u32 src_sz, u32 dst_sz, + int *size, bool ccs_copy) +{ + int err; + + if (ccs_copy) { + /* + * Using max of src_sz and dst_sz, as we need to + * pass the lmem size corresponding to the ccs + * blocks we need to handle. + */ + *size = max_t(int, src_sz, dst_sz); + err = emit_ccs_copy(rq, dst_is_lmem, dst_offset, + src_is_lmem, src_offset, + *size); + + /* Converting back to ccs bytes */ + *size = GET_CCS_BYTES(rq->engine->i915, *size); + } else { + WARN(src_sz != dst_sz, "%d != %d", src_sz, dst_sz); + *size = src_sz; + err = emit_main_copy(rq, dst_is_lmem, dst_offset, + src_is_lmem, src_offset, *size); + } + + return err; +} + int intel_context_migrate_copy(struct intel_context *ce, const struct i915_deps *deps, @@ -556,7 +755,10 @@ intel_context_migrate_copy(struct intel_context *ce, struct i915_request **out) { struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); + struct drm_i915_private *i915 = ce->engine->i915; + u32 src_sz, dst_sz, ccs_bytes = 0, bytes_to_cpy; struct i915_request *rq; + bool ccs_copy = false; int err; GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); @@ -564,9 +766,28 @@ intel_context_migrate_copy(struct intel_context *ce, GEM_BUG_ON(ce->ring->size < SZ_64K); + if (HAS_FLAT_CCS(i915) && src_is_lmem ^ dst_is_lmem) { + src_sz = scatter_list_length(src); + dst_sz = scatter_list_length(dst); + + if (src_is_lmem) + bytes_to_cpy = src_sz; + else if (dst_is_lmem) + bytes_to_cpy = dst_sz; + + /* + * When there is a eviction of ccs needed smem will have the + * extra pages for the ccs data + * + * TO-DO: Want to move the size mismatch check to a WARN_ON, + * but still we have some requests of smem->lmem with same size. + * Need to fix it. + */ + ccs_bytes = src_sz != dst_sz ? GET_CCS_BYTES(i915, bytes_to_cpy) : 0; + } + do { - u32 src_offset, dst_offset; - int len; + u32 src_offset, dst_offset, copy_sz; rq = i915_request_create(ce); if (IS_ERR(rq)) { @@ -606,27 +827,38 @@ intel_context_migrate_copy(struct intel_context *ce, dst_offset = 2 * CHUNK_SZ; } - len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, - src_offset, CHUNK_SZ); - if (len <= 0) { - err = len; + calculate_src_dst_sz(i915, src_is_lmem, &src_sz, &dst_sz, + bytes_to_cpy, ccs_copy, ccs_bytes); + + src_sz = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, + src_offset, src_sz); + if (src_sz <= 0) { + err = src_sz; goto out_rq; } + if (!ccs_bytes) + dst_sz = src_sz; + err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, - dst_offset, len); + dst_offset, dst_sz); if (err < 0) goto out_rq; - if (err < len) { + if (err < dst_sz && !ccs_bytes) { err = -EINVAL; goto out_rq; } + dst_sz = err; + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (err) goto out_rq; - err = emit_copy(rq, dst_offset, src_offset, len); + err = emit_copy(rq, dst_is_lmem, dst_offset, src_is_lmem, + src_offset, src_sz, dst_sz, ©_sz, ccs_copy); + if (!err && ccs_bytes) + bytes_to_cpy -= copy_sz; /* Arbitration is re-enabled between requests. */ out_rq: @@ -634,9 +866,33 @@ out_rq: i915_request_put(*out); *out = i915_request_get(rq); i915_request_add(rq); - if (err || !it_src.sg || !sg_dma_len(it_src.sg)) - break; + if (err || !it_src.sg || !sg_dma_len(it_src.sg) || + !it_dst.sg || !sg_dma_len(it_src.sg)) { + if (err || !ccs_bytes) + break; + + GEM_BUG_ON(bytes_to_cpy); + if (ccs_copy) { + break; + } else if (ccs_bytes) { + if (src_is_lmem) { + WARN_ON(it_src.sg && sg_dma_len(it_src.sg)); + it_src = sg_sgt(src); + } else { + WARN_ON(it_dst.sg && sg_dma_len(it_dst.sg)); + it_dst = sg_sgt(dst); + } + bytes_to_cpy = ccs_bytes; + ccs_copy = true; + + continue; + } else { + DRM_ERROR("Invalid state\n"); + err = -EINVAL; + break; + } + } cond_resched(); } while (1); |