From a55d2659d9a845f6feded6430b62365c59b012e2 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Wed, 3 May 2017 09:40:44 -0400 Subject: radeon/vcn: add decode message for hevc codec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Leo Liu Reviewed-by: Christian König --- src/gallium/drivers/radeon/radeon_vcn_dec.c | 225 ++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c index c99e77013c0..447a2009896 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -180,6 +180,212 @@ static void radeon_dec_destroy_associated_data(void *data) /* NOOP, since we only use an intptr */ } +static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec, + struct pipe_video_buffer *target, + struct pipe_h265_picture_desc *pic) +{ + rvcn_dec_message_hevc_t result; + unsigned i; + + memset(&result, 0, sizeof(result)); + result.sps_info_flags = 0; + result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0; + result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1; + result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2; + result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3; + result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4; + result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5; + result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; + result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; + result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; + if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) + result.sps_info_flags |= 1 << 9; + if (pic->UseRefPicList == true) + result.sps_info_flags |= 1 << 10; + + result.chroma_format = pic->pps->sps->chroma_format_idc; + result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8; + result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8; + result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; + result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1; + result.log2_min_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_luma_coding_block_size_minus3; + result.log2_diff_max_min_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + result.log2_min_transform_block_size_minus2 = + pic->pps->sps->log2_min_transform_block_size_minus2; + result.log2_diff_max_min_transform_block_size = + pic->pps->sps->log2_diff_max_min_transform_block_size; + result.max_transform_hierarchy_depth_inter = + pic->pps->sps->max_transform_hierarchy_depth_inter; + result.max_transform_hierarchy_depth_intra = + pic->pps->sps->max_transform_hierarchy_depth_intra; + result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1; + result.pcm_sample_bit_depth_chroma_minus1 = + pic->pps->sps->pcm_sample_bit_depth_chroma_minus1; + result.log2_min_pcm_luma_coding_block_size_minus3 = + pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3; + result.log2_diff_max_min_pcm_luma_coding_block_size = + pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size; + result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets; + + result.pps_info_flags = 0; + result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0; + result.pps_info_flags |= pic->pps->output_flag_present_flag << 1; + result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2; + result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3; + result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4; + result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5; + result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6; + result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7; + result.pps_info_flags |= pic->pps->weighted_pred_flag << 8; + result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9; + result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10; + result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11; + result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12; + result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13; + result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14; + result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15; + result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16; + result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17; + result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18; + result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19; + + result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits; + result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps; + result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1; + result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1; + result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset; + result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset; + result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2; + result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2; + result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth; + result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1; + result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1; + result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2; + result.init_qp_minus26 = pic->pps->init_qp_minus26; + + for (i = 0; i < 19; ++i) + result.column_width_minus1[i] = pic->pps->column_width_minus1[i]; + + for (i = 0; i < 21; ++i) + result.row_height_minus1[i] = pic->pps->row_height_minus1[i]; + + result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx; + result.curr_idx = pic->CurrPicOrderCntVal; + result.curr_poc = pic->CurrPicOrderCntVal; + + vl_video_buffer_set_associated_data(target, &dec->base, + (void *)(uintptr_t)pic->CurrPicOrderCntVal, + &radeon_dec_destroy_associated_data); + + for (i = 0; i < 16; ++i) { + struct pipe_video_buffer *ref = pic->ref[i]; + uintptr_t ref_pic = 0; + + result.poc_list[i] = pic->PicOrderCntVal[i]; + + if (ref) + ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base); + else + ref_pic = 0x7F; + result.ref_pic_list[i] = ref_pic; + } + + for (i = 0; i < 8; ++i) { + result.ref_pic_set_st_curr_before[i] = 0xFF; + result.ref_pic_set_st_curr_after[i] = 0xFF; + result.ref_pic_set_lt_curr[i] = 0xFF; + } + + for (i = 0; i < pic->NumPocStCurrBefore; ++i) + result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i]; + + for (i = 0; i < pic->NumPocStCurrAfter; ++i) + result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i]; + + for (i = 0; i < pic->NumPocLtCurr; ++i) + result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i]; + + for (i = 0; i < 6; ++i) + result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i]; + + for (i = 0; i < 2; ++i) + result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i]; + + memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16); + memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64); + memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64); + memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64); + + for (i = 0 ; i < 2 ; i++) { + for (int j = 0 ; j < 15 ; j++) + result.direct_reflist[i][j] = pic->RefPicList[i][j]; + } + + if ((pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) && + (target->buffer_format == PIPE_FORMAT_NV12)) { + result.p010_mode = 0; + result.luma_10to8 = 5; + result.chroma_10to8 = 5; + result.hevc_reserved[0] = 4; /* sclr_luma10to8 */ + result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */ + } + + return result; +} + +static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec) +{ + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + width = align (width, 16); + height = align (height, 16); + return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024; +} + +static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic) +{ + unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; + unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); + + unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); + unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); + unsigned coeff_10bit = (pic->pps->sps->bit_depth_luma_minus8 || + pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1; + + unsigned max_references = dec->base.max_references + 1; + + if (dec->base.width * dec->base.height >= 4096*2000) + max_references = MAX2(max_references, 8); + else + max_references = MAX2(max_references, 17); + + block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3)); + log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + + width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; + + num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4); + context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256); + max_mb_address = (unsigned) ceil(height * 8 / 2048.0); + + cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb; + db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024); + + return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size; +} + static void rvcn_dec_message_create(struct radeon_decoder *dec) { rvcn_dec_message_header_t *header = dec->msg; @@ -294,6 +500,25 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, index->message_id = RDECODE_MESSAGE_AVC; break; } + case PIPE_VIDEO_FORMAT_HEVC: { + rvcn_dec_message_hevc_t hevc = + get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture); + + memcpy(codec, (void*)&hevc, sizeof(rvcn_dec_message_hevc_t)); + index->message_id = RDECODE_MESSAGE_HEVC; + if (dec->ctx.res == NULL) { + unsigned ctx_size; + if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) + ctx_size = calc_ctx_size_h265_main10(dec, + (struct pipe_h265_picture_desc*)picture); + else + ctx_size = calc_ctx_size_h265_main(dec); + if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) + RVID_ERR("Can't allocated context buffer.\n"); + rvid_clear_buffer(dec->base.context, &dec->ctx); + } + break; + } default: assert(0); return NULL; -- cgit v1.2.3