diff options
author | Pengfei Qu <Pengfei.Qu@intel.com> | 2016-12-28 13:39:53 +0800 |
---|---|---|
committer | Sean V Kelley <seanvk@posteo.de> | 2017-01-10 15:22:08 -0800 |
commit | cea8c261210b7a64cab16bc01c33744fa4862a83 (patch) | |
tree | 569b48a4d6d2c1a6ba8c6b609acd626a50ea3086 | |
parent | 95c89e640cddad854040194c72027b48022e83d6 (diff) |
ENC: ME kernel for AVC encoder
Signed-off-by: Pengfei Qu <Pengfei.Qu@intel.com>
Reviewed-by: Sean V Kelley <seanvk@posteo.de>
-rwxr-xr-x | src/gen9_avc_encoder.c | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c index af581fc..d64694c 100755 --- a/src/gen9_avc_encoder.c +++ b/src/gen9_avc_encoder.c @@ -3771,3 +3771,444 @@ gen9_avc_kernel_mbenc(VADriverContextP ctx, &media_object_walker_param); return VA_STATUS_SUCCESS; } + +/* +me kernle related function +*/ +static void +gen9_avc_set_curbe_me(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) +{ + gen9_avc_me_curbe_data *curbe_cmd; + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state; + struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state; + + VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + + struct me_param * curbe_param = (struct me_param *)param ; + unsigned char use_mv_from_prev_step = 0; + unsigned char write_distortions = 0; + unsigned char qp_prime_y = 0; + unsigned char me_method = gen9_avc_p_me_method[generic_state->preset]; + unsigned char seach_table_idx = 0; + unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0; + unsigned int downscaled_width_in_mb, downscaled_height_in_mb; + unsigned int scale_factor = 0; + + qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta; + switch(curbe_param->hme_type) + { + case INTEL_ENC_HME_4x : + { + use_mv_from_prev_step = (generic_state->b16xme_enabled)? 1:0; + write_distortions = 1; + mv_shift_factor = 2; + scale_factor = 4; + prev_mv_read_pos_factor = 0; + break; + } + case INTEL_ENC_HME_16x : + { + use_mv_from_prev_step = (generic_state->b32xme_enabled)? 1:0; + write_distortions = 0; + mv_shift_factor = 2; + scale_factor = 16; + prev_mv_read_pos_factor = 1; + break; + } + case INTEL_ENC_HME_32x : + { + use_mv_from_prev_step = 0; + write_distortions = 0; + mv_shift_factor = 1; + scale_factor = 32; + prev_mv_read_pos_factor = 0; + break; + } + default: + assert(0); + + } + curbe_cmd = i965_gpe_context_map_curbe(gpe_context); + + if (!curbe_cmd) + return; + + downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel/scale_factor,16)/16; + downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel/scale_factor,16)/16; + + memcpy(curbe_cmd,gen9_avc_me_curbe_init_data,sizeof(gen9_avc_me_curbe_data)); + + curbe_cmd->dw3.sub_pel_mode = 3; + if(avc_state->field_scaling_output_interleaved) + { + /*frame set to zero,field specified*/ + curbe_cmd->dw3.src_access = 0; + curbe_cmd->dw3.ref_access = 0; + curbe_cmd->dw7.src_field_polarity = 0; + } + curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1; + curbe_cmd->dw4.picture_width = downscaled_width_in_mb; + curbe_cmd->dw5.qp_prime_y = qp_prime_y; + + curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step; + curbe_cmd->dw6.write_distortions = write_distortions; + curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset]; + curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only + + if(generic_state->frame_type == SLICE_TYPE_B) + { + curbe_cmd->dw1.bi_weight = 32; + curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1; + me_method = gen9_avc_b_me_method[generic_state->preset]; + seach_table_idx = 1; + } + + if(generic_state->frame_type == SLICE_TYPE_P || + generic_state->frame_type == SLICE_TYPE_B ) + curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1; + + curbe_cmd->dw13.ref_streamin_cost = 5; + curbe_cmd->dw13.roi_enable = 0; + + curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor; + curbe_cmd->dw15.mv_shift_factor = mv_shift_factor; + + memcpy(&curbe_cmd->dw16,table_enc_search_path[seach_table_idx][me_method],14*sizeof(int)); + + curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX; + curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x)? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX:GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ; + curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX; + curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX; + curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX; + curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX; + curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX; + + i965_gpe_context_unmap_curbe(gpe_context); + return; +} + +static void +gen9_avc_send_surface_me(VADriverContextP ctx, + struct encode_state *encode_state, + struct i965_gpe_context *gpe_context, + struct intel_encoder_context *encoder_context, + void * param) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx; + struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state; + + struct object_surface *obj_surface, *input_surface; + struct gen9_surface_avc *avc_priv_surface; + struct i965_gpe_resource *gpe_resource; + struct me_param * curbe_param = (struct me_param *)param ; + + VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0]; + VASurfaceID surface_id; + int i = 0; + + /* all scaled input surface stored in reconstructed_object*/ + obj_surface = encode_state->reconstructed_object; + if (!obj_surface || !obj_surface->private_data) + return; + avc_priv_surface = obj_surface->private_data; + + + switch(curbe_param->hme_type) + { + case INTEL_ENC_HME_4x : + { + /*memv output 4x*/ + gpe_resource = &avc_ctx->s4x_memv_data_buffer; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_ME_MV_DATA_SURFACE_INDEX); + + /*memv input 16x*/ + if(generic_state->b16xme_enabled) + { + gpe_resource = &avc_ctx->s16x_memv_data_buffer; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX); + } + /* brc distortion output*/ + gpe_resource = &avc_ctx->res_brc_dist_data_surface; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_ME_BRC_DISTORTION_INDEX); + /* memv distortion output*/ + gpe_resource = &avc_ctx->s4x_memv_distortion_buffer; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_ME_DISTORTION_SURFACE_INDEX); + /*input current down scaled YUV surface*/ + obj_surface = encode_state->reconstructed_object; + avc_priv_surface = obj_surface->private_data; + input_surface = avc_priv_surface->scaled_4x_surface_obj; + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX); + /*input ref scaled YUV surface*/ + for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList0[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface || !obj_surface->private_data) + break; + avc_priv_surface = obj_surface->private_data; + + input_surface = avc_priv_surface->scaled_4x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1); + } + + obj_surface = encode_state->reconstructed_object; + avc_priv_surface = obj_surface->private_data; + input_surface = avc_priv_surface->scaled_4x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX); + + for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList1[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface || !obj_surface->private_data) + break; + avc_priv_surface = obj_surface->private_data; + + input_surface = avc_priv_surface->scaled_4x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1); + } + break; + + } + case INTEL_ENC_HME_16x : + { + gpe_resource = &avc_ctx->s16x_memv_data_buffer; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_ME_MV_DATA_SURFACE_INDEX); + + if(generic_state->b32xme_enabled) + { + gpe_resource = &avc_ctx->s32x_memv_data_buffer; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX); + } + + obj_surface = encode_state->reconstructed_object; + avc_priv_surface = obj_surface->private_data; + input_surface = avc_priv_surface->scaled_16x_surface_obj; + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX); + + for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList0[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface || !obj_surface->private_data) + break; + avc_priv_surface = obj_surface->private_data; + + input_surface = avc_priv_surface->scaled_16x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1); + } + + obj_surface = encode_state->reconstructed_object; + avc_priv_surface = obj_surface->private_data; + input_surface = avc_priv_surface->scaled_16x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX); + + for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList1[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface || !obj_surface->private_data) + break; + avc_priv_surface = obj_surface->private_data; + + input_surface = avc_priv_surface->scaled_16x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1); + } + break; + } + case INTEL_ENC_HME_32x : + { + gpe_resource = &avc_ctx->s32x_memv_data_buffer; + gen9_add_buffer_2d_gpe_surface(ctx, gpe_context, + gpe_resource, + 1, + I965_SURFACEFORMAT_R8_UNORM, + GEN9_AVC_ME_MV_DATA_SURFACE_INDEX); + + obj_surface = encode_state->reconstructed_object; + avc_priv_surface = obj_surface->private_data; + input_surface = avc_priv_surface->scaled_32x_surface_obj; + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX); + + for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList0[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface || !obj_surface->private_data) + break; + avc_priv_surface = obj_surface->private_data; + + input_surface = avc_priv_surface->scaled_32x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1); + } + + obj_surface = encode_state->reconstructed_object; + avc_priv_surface = obj_surface->private_data; + input_surface = avc_priv_surface->scaled_32x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX); + + for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) + { + surface_id = slice_param->RefPicList1[i].picture_id; + obj_surface = SURFACE(surface_id); + if (!obj_surface || !obj_surface->private_data) + break; + avc_priv_surface = obj_surface->private_data; + + input_surface = avc_priv_surface->scaled_32x_surface_obj; + + gen9_add_adv_gpe_surface(ctx, gpe_context, + input_surface, + GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1); + } + break; + } + default: + assert(0); + + } +} + +static VAStatus +gen9_avc_kernel_me(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int hme_type) +{ + struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context; + struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx; + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx; + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state; + + struct i965_gpe_context *gpe_context; + struct gpe_media_object_walker_parameter media_object_walker_param; + struct gpe_encoder_kernel_walker_parameter kernel_walker_param; + unsigned int downscaled_width_in_mb, downscaled_height_in_mb; + int media_function = 0; + int kernel_idx = 0; + struct me_param param ; + unsigned int scale_factor = 0; + + switch(hme_type) + { + case INTEL_ENC_HME_4x : + { + media_function = INTEL_MEDIA_STATE_4X_ME; + scale_factor = 4; + break; + } + case INTEL_ENC_HME_16x : + { + media_function = INTEL_MEDIA_STATE_16X_ME; + scale_factor = 16; + break; + } + case INTEL_ENC_HME_32x : + { + media_function = INTEL_MEDIA_STATE_32X_ME; + scale_factor = 32; + break; + } + default: + assert(0); + + } + + downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel/scale_factor,16)/16; + downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel/scale_factor,16)/16; + + /* I frame should not come here.*/ + kernel_idx = (generic_state->frame_type == SLICE_TYPE_P)? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX; + gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]); + + gen8_gpe_context_init(ctx, gpe_context); + gen9_gpe_reset_binding_table(ctx, gpe_context); + + /*set curbe*/ + memset(¶m,0,sizeof(param)); + param.hme_type = hme_type; + generic_ctx->pfn_set_curbe_me(ctx,encode_state,gpe_context,encoder_context,¶m); + + /*send surface*/ + generic_ctx->pfn_send_me_surface(ctx,encode_state,gpe_context,encoder_context,¶m); + + gen8_gpe_setup_interface_data(ctx, gpe_context); + + memset(&kernel_walker_param, 0, sizeof(kernel_walker_param)); + /* the scaling is based on 8x8 blk level */ + kernel_walker_param.resolution_x = downscaled_width_in_mb ; + kernel_walker_param.resolution_y = downscaled_height_in_mb ; + kernel_walker_param.no_dependency = 1; + + i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param); + + gen9_avc_run_kernel_media_object_walker(ctx, encoder_context, + gpe_context, + media_function, + &media_object_walker_param); + + return VA_STATUS_SUCCESS; +} |