summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGwenole Beauchesne <gwenole.beauchesne@intel.com>2014-05-09 18:52:00 +0200
committerGwenole Beauchesne <gwenole.beauchesne@intel.com>2014-05-28 15:53:13 +0200
commitd8625d8902666f45e2232fb2e377ae0ee518ca4b (patch)
tree2c76af05e86408ceb6c1c827c9795b3a5e2ae008
parent41bc8c6ec70af3ecd8e01a4e98bddf21da11c61a (diff)
decoder: h264: optimize support for grayscale surfaces.{merged}/15.grayscale
Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luminance component ; (ii) speed by avoiding initialization of the (now inexistent) chrominance planes. Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. v2: fix check for extra H.264 chroma formats [Haihao] Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
-rwxr-xr-xsrc/gen6_mfd.c8
-rw-r--r--src/gen75_mfd.c6
-rwxr-xr-xsrc/gen7_mfd.c6
-rw-r--r--src/gen8_mfd.c6
-rw-r--r--src/i965_decoder_utils.c23
-rw-r--r--src/i965_device_info.c9
-rwxr-xr-xsrc/i965_drv_video.c13
-rw-r--r--src/i965_drv_video.h9
8 files changed, 71 insertions, 9 deletions
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 2092f69..f925d98 100755
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
{
struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
struct object_surface *obj_surface = decode_state->render_object;
-
+ unsigned int surface_format;
+
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 19) |
((obj_surface->orig_width - 1) << 6));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
(1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
(0 << 22) | /* surface object control state, FIXME??? */
((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
index 5b023cf..895b194 100644
--- a/src/gen75_mfd.c
+++ b/src/gen75_mfd.c
@@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 06eb743..f9114e7 100755
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
index e3e71fb..10495d8 100644
--- a/src/gen8_mfd.c
+++ b/src/gen8_mfd.c
@@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index 18704fe..9a5092e 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -185,25 +185,40 @@ avc_ensure_surface_bo(
)
{
VAStatus va_status;
- uint32_t hw_fourcc, fourcc, subsample;
+ uint32_t hw_fourcc, fourcc, subsample, chroma_format;
/* Validate chroma format */
switch (pic_param->seq_fields.bits.chroma_format_idc) {
case 0: // Grayscale
fourcc = VA_FOURCC_Y800;
subsample = SUBSAMPLE_YUV400;
+ chroma_format = VA_RT_FORMAT_YUV400;
break;
case 1: // YUV 4:2:0
fourcc = VA_FOURCC_NV12;
subsample = SUBSAMPLE_YUV420;
+ chroma_format = VA_RT_FORMAT_YUV420;
break;
default:
return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
}
- /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */
- hw_fourcc = VA_FOURCC_NV12;
- subsample = SUBSAMPLE_YUV420;
+ /* Determine the HW surface format, bound to VA config needs */
+ if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
+ hw_fourcc = fourcc;
+ else {
+ hw_fourcc = 0;
+ switch (fourcc) {
+ case VA_FOURCC_Y800: // Implement with an NV12 surface
+ if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
+ hw_fourcc = VA_FOURCC_NV12;
+ subsample = SUBSAMPLE_YUV420;
+ }
+ break;
+ }
+ }
+ if (!hw_fourcc)
+ return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
/* (Re-)allocate the underlying surface buffer store, if necessary */
if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
diff --git a/src/i965_device_info.c b/src/i965_device_info.c
index 1d5d6aa..4fad7a4 100644
--- a/src/i965_device_info.c
+++ b/src/i965_device_info.c
@@ -27,6 +27,10 @@
#include <stdlib.h>
#include "i965_drv_video.h"
+/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
+#define EXTRA_H264_DEC_CHROMA_FORMATS \
+ (VA_RT_FORMAT_YUV400)
+
/* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
#define EXTRA_JPEG_DEC_CHROMA_FORMATS \
(VA_RT_FORMAT_YUV400 | VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | \
@@ -90,6 +94,8 @@ static const struct hw_codec_info snb_hw_codec_info = {
.min_linear_wpitch = 16,
.min_linear_hpitch = 16,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+
.has_mpeg2_decoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
@@ -120,6 +126,7 @@ static const struct hw_codec_info ivb_hw_codec_info = {
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
@@ -156,6 +163,7 @@ static const struct hw_codec_info hsw_hw_codec_info = {
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
@@ -196,6 +204,7 @@ static const struct hw_codec_info bdw_hw_codec_info = {
.min_linear_wpitch = 64,
.min_linear_hpitch = 16,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 4690b62..c7da398 100755
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -446,6 +446,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile,
uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
switch (profile) {
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ if (HAS_H264_DECODING(i965) && entrypoint == VAEntrypointVLD)
+ chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
+ break;
+
case VAProfileJPEGBaseline:
if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
@@ -1532,6 +1539,7 @@ i965_CreateContext(VADriverContextP ctx,
struct i965_render_state *render_state = &i965->render_state;
struct object_config *obj_config = CONFIG(config_id);
struct object_context *obj_context = NULL;
+ VAConfigAttrib *attrib;
VAStatus vaStatus = VA_STATUS_SUCCESS;
int contextID;
int i;
@@ -1625,6 +1633,11 @@ i965_CreateContext(VADriverContextP ctx,
}
}
+ attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat);
+ if (!attrib)
+ return VA_STATUS_ERROR_INVALID_CONFIG;
+ obj_context->codec_state.base.chroma_formats = attrib->value;
+
/* Error recovery */
if (VA_STATUS_SUCCESS != vaStatus) {
i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index a09e071..e8bbf87 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -101,8 +101,13 @@ struct object_config
#define NUM_SLICES 10
+struct codec_state_base {
+ uint32_t chroma_formats;
+};
+
struct decode_state
{
+ struct codec_state_base base;
struct buffer_store *pic_param;
struct buffer_store **slice_params;
struct buffer_store *iq_matrix;
@@ -122,6 +127,7 @@ struct decode_state
struct encode_state
{
+ struct codec_state_base base;
struct buffer_store *seq_param;
struct buffer_store *pic_param;
struct buffer_store *pic_control;
@@ -152,6 +158,7 @@ struct encode_state
struct proc_state
{
+ struct codec_state_base base;
struct buffer_store *pipeline_param;
VASurfaceID current_render_target;
@@ -163,6 +170,7 @@ struct proc_state
union codec_state
{
+ struct codec_state_base base;
struct decode_state decode;
struct encode_state encode;
struct proc_state proc;
@@ -294,6 +302,7 @@ struct hw_codec_info
int min_linear_wpitch;
int min_linear_hpitch;
+ unsigned int h264_dec_chroma_formats;
unsigned int jpeg_dec_chroma_formats;
unsigned int has_mpeg2_decoding:1;