23 files changed, 7901 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
new file mode 100644
index 00000000000..78b8d0627ce
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -0,0 +1,734 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_context.h>
+
+#include <util/u_memory.h>
+#include <util/u_draw.h>
+#include <util/u_surface.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include "vl_csc.h"
+#include "vl_types.h"
+#include "vl_compositor.h"
+
+typedef float csc_matrix[16];
+
+static void *
+create_vert_shader(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex;
+   struct ureg_dst o_vpos, o_vtex;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
+
+   vpos = ureg_DECL_vs_input(shader, 0);
+   vtex = ureg_DECL_vs_input(shader, 1);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1);
+
+   /*
+    * o_vpos = vpos
+    * o_vtex = vtex
+    */
+   ureg_MOV(shader, o_vpos, vpos);
+   ureg_MOV(shader, o_vtex, vtex);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static void *
+create_frag_shader_video_buffer(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc;
+   struct ureg_src csc[3];
+   struct ureg_src sampler[3];
+   struct ureg_dst texel;
+   struct ureg_dst fragment;
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 3; ++i) {
+      csc[i] = ureg_DECL_constant(shader, i);
+      sampler[i] = ureg_DECL_sampler(shader, i);
+   }
+   texel = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * texel.xyz = tex(tc, sampler[i])
+    * fragment = csc * texel
+    */
+   for (i = 0; i < 3; ++i)
+      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, tc, sampler[i]);
+
+   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+   for (i = 0; i < 3; ++i)
+      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
+
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+   ureg_release_temporary(shader, texel);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static void *
+create_frag_shader_palette(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src csc[3];
+   struct ureg_src tc;
+   struct ureg_src sampler;
+   struct ureg_src palette;
+   struct ureg_dst texel;
+   struct ureg_dst fragment;
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   for (i = 0; i < 3; ++i)
+      csc[i] = ureg_DECL_constant(shader, i);
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+   palette = ureg_DECL_sampler(shader, 1);
+   texel = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * texel = tex(tc, sampler)
+    * fragment.xyz = tex(texel, palette) * csc
+    * fragment.a = texel.a
+    */
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_MUL(shader, ureg_writemask(texel, TGSI_WRITEMASK_X), ureg_src(texel), ureg_imm1f(shader, 15.0f / 16.0f));
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel));
+
+   ureg_TEX(shader, texel, TGSI_TEXTURE_1D, ureg_src(texel), palette);
+
+   for (i = 0; i < 3; ++i)
+      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
+
+   ureg_release_temporary(shader, texel);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static void *
+create_frag_shader_rgba(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc;
+   struct ureg_src sampler;
+   struct ureg_dst fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * fragment = tex(tc, sampler)
+    */
+   ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static bool
+init_shaders(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->vs = create_vert_shader(c);
+   if (!c->vs) {
+      debug_printf("Unable to create vertex shader.\n");
+      return false;
+   }
+
+   c->fs_video_buffer = create_frag_shader_video_buffer(c);
+   if (!c->fs_video_buffer) {
+      debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   c->fs_palette = create_frag_shader_palette(c);
+   if (!c->fs_palette) {
+      debug_printf("Unable to create Palette-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   c->fs_rgba = create_frag_shader_rgba(c);
+   if (!c->fs_rgba) {
+      debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   return true;
+}
+
+static void cleanup_shaders(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->pipe->delete_vs_state(c->pipe, c->vs);
+   c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer);
+   c->pipe->delete_fs_state(c->pipe, c->fs_palette);
+   c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
+}
+
+static bool
+init_pipe_state(struct vl_compositor *c)
+{
+   struct pipe_rasterizer_state rast;
+   struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
+
+   assert(c);
+
+   c->fb_state.nr_cbufs = 1;
+   c->fb_state.zsbuf = NULL;
+
+   c->viewport.scale[2] = 1;
+   c->viewport.scale[3] = 1;
+   c->viewport.translate[0] = 0;
+   c->viewport.translate[1] = 0;
+   c->viewport.translate[2] = 0;
+   c->viewport.translate[3] = 0;
+
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+   sampler.compare_func = PIPE_FUNC_ALWAYS;
+   sampler.normalized_coords = 1;
+
+   c->sampler_linear = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   c->sampler_nearest = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   memset(&blend, 0, sizeof blend);
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 1;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   c->blend = c->pipe->create_blend_state(c->pipe, &blend);
+
+   memset(&rast, 0, sizeof rast);
+   rast.flatshade = 1;
+   rast.front_ccw = 1;
+   rast.cull_face = PIPE_FACE_NONE;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.scissor = 1;
+   rast.line_width = 1;
+   rast.point_size_per_vertex = 1;
+   rast.offset_units = 1;
+   rast.offset_scale = 1;
+   rast.gl_rasterization_rules = 1;
+
+   c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
+
+   return true;
+}
+
+static void cleanup_pipe_state(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->pipe->delete_sampler_state(c->pipe, c->sampler_linear);
+   c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest);
+   c->pipe->delete_blend_state(c->pipe, c->blend);
+   c->pipe->delete_rasterizer_state(c->pipe, c->rast);
+}
+
+static bool
+create_vertex_buffer(struct vl_compositor *c)
+{
+   assert(c);
+
+   pipe_resource_reference(&c->vertex_buf.buffer, NULL);
+   c->vertex_buf.buffer = pipe_buffer_create
+   (
+      c->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STREAM,
+      sizeof(struct vertex4f) * VL_COMPOSITOR_MAX_LAYERS * 4
+   );
+   return c->vertex_buf.buffer != NULL;
+}
+
+static bool
+init_buffers(struct vl_compositor *c)
+{
+   struct pipe_vertex_element vertex_elems[2];
+
+   assert(c);
+
+   /*
+    * Create our vertex buffer and vertex buffer elements
+    */
+   c->vertex_buf.stride = sizeof(struct vertex4f);
+   c->vertex_buf.buffer_offset = 0;
+   create_vertex_buffer(c);
+
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 0;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   c->vertex_elems_state = c->pipe->create_vertex_elements_state(c->pipe, 2, vertex_elems);
+
+   /*
+    * Create our fragment shader's constant buffer
+    * Const buffer contains the color conversion matrix and bias vectors
+    */
+   /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */
+   c->csc_matrix = pipe_buffer_create
+   (
+      c->pipe->screen,
+      PIPE_BIND_CONSTANT_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(csc_matrix)
+   );
+
+   return true;
+}
+
+static void
+cleanup_buffers(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems_state);
+   pipe_resource_reference(&c->vertex_buf.buffer, NULL);
+   pipe_resource_reference(&c->csc_matrix, NULL);
+}
+
+static inline struct pipe_video_rect
+default_rect(struct vl_compositor_layer *layer)
+{
+   struct pipe_resource *res = layer->sampler_views[0]->texture;
+   struct pipe_video_rect rect = { 0, 0, res->width0, res->height0 };
+   return rect;
+}
+
+static inline struct vertex2f
+calc_topleft(struct vertex2f size, struct pipe_video_rect rect)
+{
+   struct vertex2f res = { rect.x / size.x, rect.y / size.y };
+   return res;
+}
+
+static inline struct vertex2f
+calc_bottomright(struct vertex2f size, struct pipe_video_rect rect)
+{
+   struct vertex2f res = { (rect.x + rect.w) / size.x, (rect.y + rect.h) / size.y };
+   return res;
+}
+
+static inline void
+calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
+                 struct pipe_video_rect src, struct pipe_video_rect dst)
+{
+   struct vertex2f size =  { width, height };
+
+   layer->src.tl = calc_topleft(size, src);
+   layer->src.br = calc_bottomright(size, src);
+   layer->dst.tl = calc_topleft(size, dst);
+   layer->dst.br = calc_bottomright(size, dst);
+}
+
+static void
+gen_rect_verts(struct vertex4f *vb, struct vl_compositor_layer *layer)
+{
+   assert(vb && layer);
+
+   vb[0].x = layer->dst.tl.x;
+   vb[0].y = layer->dst.tl.y;
+   vb[0].z = layer->src.tl.x;
+   vb[0].w = layer->src.tl.y;
+
+   vb[1].x = layer->dst.br.x;
+   vb[1].y = layer->dst.tl.y;
+   vb[1].z = layer->src.br.x;
+   vb[1].w = layer->src.tl.y;
+
+   vb[2].x = layer->dst.br.x;
+   vb[2].y = layer->dst.br.y;
+   vb[2].z = layer->src.br.x;
+   vb[2].w = layer->src.br.y;
+
+   vb[3].x = layer->dst.tl.x;
+   vb[3].y = layer->dst.br.y;
+   vb[3].z = layer->src.tl.x;
+   vb[3].w = layer->src.br.y;
+}
+
+static void
+gen_vertex_data(struct vl_compositor *c)
+{
+   struct vertex4f *vb;
+   struct pipe_transfer *buf_transfer;
+   unsigned i;
+
+   assert(c);
+
+   vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
+                        PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK,
+                        &buf_transfer);
+
+   if (!vb) {
+      // If buffer is still locked from last draw create a new one
+      create_vertex_buffer(c);
+      vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
+                           PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+                           &buf_transfer);
+   }
+
+   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
+      if (c->used_layers & (1 << i)) {
+         struct vl_compositor_layer *layer = &c->layers[i];
+         gen_rect_verts(vb, layer);
+         vb += 4;
+
+         if (layer->clearing &&
+             c->dirty_tl.x >= layer->dst.tl.x &&
+             c->dirty_tl.y >= layer->dst.tl.y &&
+             c->dirty_br.x <= layer->dst.br.x &&
+             c->dirty_br.y <= layer->dst.br.y) {
+
+            // We clear the dirty area anyway, no need for clear_render_target
+            c->dirty_tl.x = c->dirty_tl.y = 1.0f;
+            c->dirty_br.x = c->dirty_br.y = 0.0f;
+         }
+      }
+   }
+
+   pipe_buffer_unmap(c->pipe, buf_transfer);
+}
+
+static void
+draw_layers(struct vl_compositor *c)
+{
+   unsigned vb_index, i;
+
+   assert(c);
+
+   for (i = 0, vb_index = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      if (c->used_layers & (1 << i)) {
+         struct vl_compositor_layer *layer = &c->layers[i];
+         struct pipe_sampler_view **samplers = &layer->sampler_views[0];
+         unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
+
+         c->pipe->bind_fs_state(c->pipe, layer->fs);
+         c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, layer->samplers);
+         c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
+         util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
+         vb_index++;
+
+         // Remember the currently drawn area as dirty for the next draw command
+         c->dirty_tl.x = MIN2(layer->dst.tl.x, c->dirty_tl.x);
+         c->dirty_tl.y = MIN2(layer->dst.tl.y, c->dirty_tl.y);
+         c->dirty_br.x = MAX2(layer->dst.br.x, c->dirty_br.x);
+         c->dirty_br.y = MAX2(layer->dst.br.y, c->dirty_br.y);
+      }
+   }
+}
+
+void
+vl_compositor_reset_dirty_area(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->dirty_tl.x = c->dirty_tl.y = 0.0f;
+   c->dirty_br.x = c->dirty_br.y = 1.0f;
+}
+
+void
+vl_compositor_set_clear_color(struct vl_compositor *c, float color[4])
+{
+   unsigned i;
+
+   assert(c);
+
+   for (i = 0; i < 4; ++i)
+      c->clear_color[i] = color[i];
+}
+
+void
+vl_compositor_clear_layers(struct vl_compositor *c)
+{
+   unsigned i, j;
+
+   assert(c);
+
+   c->used_layers = 0;
+   for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      c->layers[i].fs = NULL;
+      for ( j = 0; j < 3; j++)
+         pipe_sampler_view_reference(&c->layers[i].sampler_views[j], NULL);
+   }
+}
+
+void
+vl_compositor_cleanup(struct vl_compositor *c)
+{
+   assert(c);
+
+   vl_compositor_clear_layers(c);
+
+   cleanup_buffers(c);
+   cleanup_shaders(c);
+   cleanup_pipe_state(c);
+}
+
+void
+vl_compositor_set_csc_matrix(struct vl_compositor *c, const float matrix[16])
+{
+   struct pipe_transfer *buf_transfer;
+
+   assert(c);
+
+   memcpy
+   (
+      pipe_buffer_map(c->pipe, c->csc_matrix,
+                      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+                      &buf_transfer),
+		matrix,
+		sizeof(csc_matrix)
+   );
+
+   pipe_buffer_unmap(c->pipe, buf_transfer);
+}
+
+void
+vl_compositor_set_buffer_layer(struct vl_compositor *c,
+                               unsigned layer,
+                               struct pipe_video_buffer *buffer,
+                               struct pipe_video_rect *src_rect,
+                               struct pipe_video_rect *dst_rect)
+{
+   struct pipe_sampler_view **sampler_views;
+   unsigned i;
+
+   assert(c && buffer);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   c->used_layers |= 1 << layer;
+   c->layers[layer].clearing = true;
+   c->layers[layer].fs = c->fs_video_buffer;
+
+   sampler_views = buffer->get_sampler_view_components(buffer);
+   for (i = 0; i < 3; ++i) {
+      c->layers[layer].samplers[i] = c->sampler_linear;
+      pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]);
+   }
+
+   calc_src_and_dst(&c->layers[layer], buffer->width, buffer->height,
+                    src_rect ? *src_rect : default_rect(&c->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
+}
+
+void
+vl_compositor_set_palette_layer(struct vl_compositor *c,
+                                unsigned layer,
+                                struct pipe_sampler_view *indexes,
+                                struct pipe_sampler_view *palette,
+                                struct pipe_video_rect *src_rect,
+                                struct pipe_video_rect *dst_rect)
+{
+   assert(c && indexes && palette);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   c->used_layers |= 1 << layer;
+   c->layers[layer].clearing = false;
+   c->layers[layer].fs = c->fs_palette;
+   c->layers[layer].samplers[0] = c->sampler_linear;
+   c->layers[layer].samplers[1] = c->sampler_nearest;
+   c->layers[layer].samplers[2] = NULL;
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], indexes);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], palette);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
+   calc_src_and_dst(&c->layers[layer], indexes->texture->width0, indexes->texture->height0,
+                    src_rect ? *src_rect : default_rect(&c->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
+
+}
+
+void
+vl_compositor_set_rgba_layer(struct vl_compositor *c,
+                             unsigned layer,
+                             struct pipe_sampler_view *rgba,
+                             struct pipe_video_rect *src_rect,
+                             struct pipe_video_rect *dst_rect)
+{
+   assert(c && rgba);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   c->used_layers |= 1 << layer;
+   c->layers[layer].clearing = rgba->swizzle_a == PIPE_SWIZZLE_ONE;
+   c->layers[layer].fs = c->fs_rgba;
+   c->layers[layer].samplers[0] = c->sampler_linear;
+   c->layers[layer].samplers[1] = NULL;
+   c->layers[layer].samplers[2] = NULL;
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], rgba);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], NULL);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
+   calc_src_and_dst(&c->layers[layer], rgba->texture->width0, rgba->texture->height0,
+                    src_rect ? *src_rect : default_rect(&c->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
+}
+
+void
+vl_compositor_render(struct vl_compositor *c,
+                     enum pipe_mpeg12_picture_type picture_type,
+                     struct pipe_surface           *dst_surface,
+                     struct pipe_video_rect        *dst_area,
+                     struct pipe_fence_handle      **fence)
+{
+   struct pipe_scissor_state scissor;
+
+   assert(c);
+   assert(dst_surface);
+
+   c->fb_state.width = dst_surface->width;
+   c->fb_state.height = dst_surface->height;
+   c->fb_state.cbufs[0] = dst_surface;
+
+   c->viewport.scale[0] = dst_surface->width;
+   c->viewport.scale[1] = dst_surface->height;
+
+   if (dst_area) {
+      scissor.minx = dst_area->x;
+      scissor.miny = dst_area->y;
+      scissor.maxx = dst_area->x + dst_area->w;
+      scissor.maxy = dst_area->y + dst_area->h;
+   } else {
+      scissor.minx = 0;
+      scissor.miny = 0;
+      scissor.maxx = dst_surface->width;
+      scissor.maxy = dst_surface->height;
+   }
+
+   gen_vertex_data(c);
+
+   if (c->dirty_tl.x < c->dirty_br.x || c->dirty_tl.y < c->dirty_br.y) {
+      util_clear_render_target(c->pipe, dst_surface, c->clear_color, 0, 0, dst_surface->width, dst_surface->height);
+      c->dirty_tl.x = c->dirty_tl.y = 1.0f;
+      c->dirty_br.x = c->dirty_br.y = 0.0f;
+   }
+
+   c->pipe->set_scissor_state(c->pipe, &scissor);
+   c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
+   c->pipe->set_viewport_state(c->pipe, &c->viewport);
+   c->pipe->bind_vs_state(c->pipe, c->vs);
+   c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
+   c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
+   c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, c->csc_matrix);
+   c->pipe->bind_blend_state(c->pipe, c->blend);
+   c->pipe->bind_rasterizer_state(c->pipe, c->rast);
+
+   draw_layers(c);
+
+   c->pipe->flush(c->pipe, fence);
+}
+
+bool
+vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
+{
+   csc_matrix csc_matrix;
+
+   c->pipe = pipe;
+
+   if (!init_pipe_state(c))
+      return false;
+
+   if (!init_shaders(c)) {
+      cleanup_pipe_state(c);
+      return false;
+   }
+   if (!init_buffers(c)) {
+      cleanup_shaders(c);
+      cleanup_pipe_state(c);
+      return false;
+   }
+
+   vl_compositor_clear_layers(c);
+
+   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
+   vl_compositor_set_csc_matrix(c, csc_matrix);
+
+   c->clear_color[0] = c->clear_color[1] = 0.0f;
+   c->clear_color[2] = c->clear_color[3] = 0.0f;
+   vl_compositor_reset_dirty_area(c);
+
+   return true;
+}
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
new file mode 100644
index 00000000000..df662db4d91
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -0,0 +1,169 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_compositor_h
+#define vl_compositor_h
+
+#include <pipe/p_state.h>
+#include <pipe/p_video_decoder.h>
+#include <pipe/p_video_state.h>
+
+#include "vl_types.h"
+
+struct pipe_context;
+
+/**
+ * composing and displaying of image data
+ */
+
+#define VL_COMPOSITOR_MAX_LAYERS 16
+
+struct vl_compositor_layer
+{
+   bool clearing;
+
+   void *fs;
+   void *samplers[3];
+
+   struct pipe_sampler_view *sampler_views[3];
+   struct {
+      struct vertex2f tl, br;
+   } src, dst;
+};
+
+struct vl_compositor
+{
+   struct pipe_context *pipe;
+
+   struct pipe_framebuffer_state fb_state;
+   struct pipe_viewport_state viewport;
+   struct pipe_vertex_buffer vertex_buf;
+   struct pipe_resource *csc_matrix;
+
+   void *sampler_linear;
+   void *sampler_nearest;
+   void *blend;
+   void *rast;
+   void *vertex_elems_state;
+
+   void *vs;
+   void *fs_video_buffer;
+   void *fs_palette;
+   void *fs_rgba;
+
+   float clear_color[4];
+   struct vertex2f dirty_tl, dirty_br;
+
+   unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS;
+   struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS];
+};
+
+/**
+ * initialize this compositor
+ */
+bool
+vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
+
+/**
+ * set yuv -> rgba conversion matrix
+ */
+void
+vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float mat[16]);
+
+/**
+ * reset dirty area, so it's cleared with the clear colour
+ */
+void
+vl_compositor_reset_dirty_area(struct vl_compositor *compositor);
+
+/**
+ * set the clear color
+ */
+void
+vl_compositor_set_clear_color(struct vl_compositor *compositor, float color[4]);
+
+/**
+ * set overlay samplers
+ */
+/*@{*/
+
+/**
+ * reset all currently set layers
+ */
+void
+vl_compositor_clear_layers(struct vl_compositor *compositor);
+
+/**
+ * set a video buffer as a layer to render
+ */
+void
+vl_compositor_set_buffer_layer(struct vl_compositor *compositor,
+                               unsigned layer,
+                               struct pipe_video_buffer *buffer,
+                               struct pipe_video_rect *src_rect,
+                               struct pipe_video_rect *dst_rect);
+
+/**
+ * set a paletted sampler as a layer to render
+ */
+void
+vl_compositor_set_palette_layer(struct vl_compositor *compositor,
+                                unsigned layer,
+                                struct pipe_sampler_view *indexes,
+                                struct pipe_sampler_view *palette,
+                                struct pipe_video_rect *src_rect,
+                                struct pipe_video_rect *dst_rect);
+
+/**
+ * set a rgba sampler as a layer to render
+ */
+void
+vl_compositor_set_rgba_layer(struct vl_compositor *compositor,
+                             unsigned layer,
+                             struct pipe_sampler_view *rgba,
+                             struct pipe_video_rect *src_rect,
+                             struct pipe_video_rect *dst_rect);
+
+/*@}*/
+
+/**
+ * render the layers to the frontbuffer
+ */
+void
+vl_compositor_render(struct vl_compositor          *compositor,
+                     enum pipe_mpeg12_picture_type picture_type,
+                     struct pipe_surface           *dst_surface,
+                     struct pipe_video_rect        *dst_area,
+                     struct pipe_fence_handle      **fence);
+
+/**
+* destroy this compositor
+*/
+void
+vl_compositor_cleanup(struct vl_compositor *compositor);
+
+#endif /* vl_compositor_h */
diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c
new file mode 100644
index 00000000000..00eefa293a4
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_csc.c
@@ -0,0 +1,217 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <util/u_math.h>
+#include <util/u_debug.h>
+
+#include "vl_csc.h"
+
+/*
+ * Color space conversion formulas
+ *
+ * To convert YCbCr to RGB,
+ *    vec4  ycbcr, rgb
+ *    mat44 csc
+ *    rgb = csc * ycbcr
+ *
+ * To calculate the color space conversion matrix csc with ProcAmp adjustments,
+ *    mat44 csc, cstd, procamp, bias
+ *    csc = cstd * (procamp * bias)
+ *
+ * Where cstd is a matrix corresponding to one of the color standards (BT.601, BT.709, etc)
+ * adjusted for the kind of YCbCr -> RGB mapping wanted (1:1, full),
+ * bias is a matrix corresponding to the kind of YCbCr -> RGB mapping wanted (1:1, full)
+ *
+ * To calculate procamp,
+ *    mat44 procamp, hue, saturation, brightness, contrast
+ *    procamp = brightness * (saturation * (contrast * hue))
+ * Alternatively,
+ *    procamp = saturation * (brightness * (contrast * hue))
+ *
+ * contrast
+ * [ c, 0, 0, 0]
+ * [ 0, c, 0, 0]
+ * [ 0, 0, c, 0]
+ * [ 0, 0, 0, 1]
+ *
+ * brightness
+ * [ 1, 0, 0, b]
+ * [ 0, 1, 0, 0]
+ * [ 0, 0, 1, 0]
+ * [ 0, 0, 0, 1]
+ *
+ * saturation
+ * [ 1, 0, 0, 0]
+ * [ 0, s, 0, 0]
+ * [ 0, 0, s, 0]
+ * [ 0, 0, 0, 1]
+ *
+ * hue
+ * [ 1,       0,      0, 0]
+ * [ 0,  cos(h), sin(h), 0]
+ * [ 0, -sin(h), cos(h), 0]
+ * [ 0,       0,      0, 1]
+ *
+ * procamp
+ * [ c,           0,          0, b]
+ * [ 0,  c*s*cos(h), c*s*sin(h), 0]
+ * [ 0, -c*s*sin(h), c*s*cos(h), 0]
+ * [ 0,           0,          0, 1]
+ *
+ * bias
+ * [ 1, 0, 0,  ybias]
+ * [ 0, 1, 0, cbbias]
+ * [ 0, 0, 1, crbias]
+ * [ 0, 0, 0,      1]
+ *
+ * csc
+ * [ c*cstd[ 0], c*cstd[ 1]*s*cos(h) - c*cstd[ 2]*s*sin(h), c*cstd[ 2]*s*cos(h) + c*cstd[ 1]*s*sin(h), cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 2]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))]
+ * [ c*cstd[ 4], c*cstd[ 5]*s*cos(h) - c*cstd[ 6]*s*sin(h), c*cstd[ 6]*s*cos(h) + c*cstd[ 5]*s*sin(h), cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[ 6]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))]
+ * [ c*cstd[ 8], c*cstd[ 9]*s*cos(h) - c*cstd[10]*s*sin(h), c*cstd[10]*s*cos(h) + c*cstd[ 9]*s*sin(h), cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[10]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))]
+ * [ c*cstd[12], c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h), c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h), cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h))]
+ */
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const float bt_601[16] =
+{
+   1.0f,  0.0f,    1.371f, 0.0f,
+   1.0f, -0.336f, -0.698f, 0.0f,
+   1.0f,  1.732f,  0.0f,   0.0f,
+   0.0f,  0.0f,    0.0f,   1.0f
+};
+
+/*
+ * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+static const float bt_601_full[16] =
+{
+   1.164f,  0.0f,    1.596f, 0.0f,
+   1.164f, -0.391f, -0.813f, 0.0f,
+   1.164f,  2.018f,  0.0f,   0.0f,
+   0.0f,    0.0f,    0.0f,   1.0f
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [16,235]
+ */
+static const float bt_709[16] =
+{
+   1.0f,  0.0f,    1.540f, 0.0f,
+   1.0f, -0.183f, -0.459f, 0.0f,
+   1.0f,  1.816f,  0.0f,   0.0f,
+   0.0f,  0.0f,    0.0f,   1.0f
+};
+
+/*
+ * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where:
+ * Y is in [16,235], Cb and Cr are in [16,240]
+ * R, G, and B are in [0,255]
+ */
+static const float bt_709_full[16] =
+{
+   1.164f,  0.0f,    1.793f, 0.0f,
+   1.164f, -0.213f, -0.534f, 0.0f,
+   1.164f,  2.115f,  0.0f,   0.0f,
+   0.0f,    0.0f,    0.0f,   1.0f
+};
+
+static const float identity[16] =
+{
+   1.0f, 0.0f, 0.0f, 0.0f,
+   0.0f, 1.0f, 0.0f, 0.0f,
+   0.0f, 0.0f, 1.0f, 0.0f,
+   0.0f, 0.0f, 0.0f, 1.0f
+};
+
+const struct vl_procamp vl_default_procamp = {
+   .contrast = 1.0f,
+   .saturation = 1.0f,
+   .brightness = 0.0f,
+   .hue = 0.0f
+};
+
+void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs,
+                       struct vl_procamp *procamp,
+                       bool full_range,
+                       float *matrix)
+{
+   float ybias = full_range ? -16.0f/255.0f : 0.0f;
+   float cbbias = -128.0f/255.0f;
+   float crbias = -128.0f/255.0f;
+
+   const struct vl_procamp *p = procamp ? procamp : &vl_default_procamp;
+   float c = p->contrast;
+   float s = p->saturation;
+   float b = p->brightness;
+   float h = p->hue;
+
+   const float *cstd;
+
+   assert(matrix);
+
+   switch (cs) {
+      case VL_CSC_COLOR_STANDARD_BT_601:
+         cstd = full_range ? &bt_601_full[0] : &bt_601[0];
+         break;
+      case VL_CSC_COLOR_STANDARD_BT_709:
+         cstd = full_range ? &bt_709_full[0] : &bt_709[0];
+         break;
+      case VL_CSC_COLOR_STANDARD_IDENTITY:
+      default:
+         assert(cs == VL_CSC_COLOR_STANDARD_IDENTITY);
+         memcpy(matrix, &identity[0], sizeof(float) * 16);
+         return;
+   }
+
+   matrix[ 0] = c*cstd[ 0];
+   matrix[ 1] = c*cstd[ 1]*s*cosf(h) - c*cstd[ 2]*s*sinf(h);
+   matrix[ 2] = c*cstd[ 2]*s*cosf(h) + c*cstd[ 1]*s*sinf(h);
+   matrix[ 3] = cstd[ 3] + cstd[ 0]*(b + c*ybias) + cstd[ 1]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 2]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h));
+
+   matrix[ 4] = c*cstd[ 4];
+   matrix[ 5] = c*cstd[ 5]*s*cosf(h) - c*cstd[ 6]*s*sinf(h);
+   matrix[ 6] = c*cstd[ 6]*s*cosf(h) + c*cstd[ 5]*s*sinf(h);
+   matrix[ 7] = cstd[ 7] + cstd[ 4]*(b + c*ybias) + cstd[ 5]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[ 6]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h));
+
+   matrix[ 8] = c*cstd[ 8];
+   matrix[ 9] = c*cstd[ 9]*s*cosf(h) - c*cstd[10]*s*sinf(h);
+   matrix[10] = c*cstd[10]*s*cosf(h) + c*cstd[ 9]*s*sinf(h);
+   matrix[11] = cstd[11] + cstd[ 8]*(b + c*ybias) + cstd[ 9]*(c*cbbias*s*cosf(h) + c*crbias*s*sinf(h)) + cstd[10]*(c*crbias*s*cosf(h) - c*cbbias*s*sinf(h));
+
+   matrix[12] = c*cstd[12];
+   matrix[13] = c*cstd[13]*s*cos(h) - c*cstd[14]*s*sin(h);
+   matrix[14] = c*cstd[14]*s*cos(h) + c*cstd[13]*s*sin(h);
+   matrix[15] = cstd[15] + cstd[12]*(b + c*ybias) + cstd[13]*(c*cbbias*s*cos(h) + c*crbias*s*sin(h)) + cstd[14]*(c*crbias*s*cos(h) - c*cbbias*s*sin(h));
+}
diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/vl/vl_csc.h
new file mode 100644
index 00000000000..9b73fb3aef2
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_csc.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_csc_h
+#define vl_csc_h
+
+#include <pipe/p_compiler.h>
+
+struct vl_procamp
+{
+   float brightness;
+   float contrast;
+   float saturation;
+   float hue;
+};
+
+enum VL_CSC_COLOR_STANDARD
+{
+   VL_CSC_COLOR_STANDARD_IDENTITY,
+   VL_CSC_COLOR_STANDARD_BT_601,
+   VL_CSC_COLOR_STANDARD_BT_709
+};
+
+extern const struct vl_procamp vl_default_procamp;
+
+void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs,
+                       struct vl_procamp *procamp,
+                       bool full_range,
+                       float *matrix);
+
+#endif /* vl_csc_h */
diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c
new file mode 100644
index 00000000000..fac03359a0f
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_decoder.c
@@ -0,0 +1,77 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_video_decoder.h>
+
+#include <util/u_video.h>
+
+#include "vl_decoder.h"
+#include "vl_mpeg12_decoder.h"
+
+bool
+vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile)
+{
+   assert(screen);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return true;
+      default:
+         return false;
+   }
+}
+
+struct pipe_video_decoder *
+vl_create_decoder(struct pipe_context *pipe,
+                  enum pipe_video_profile profile,
+                  enum pipe_video_entrypoint entrypoint,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height)
+{
+   unsigned buffer_width, buffer_height;
+   bool pot_buffers;
+
+   assert(pipe);
+   assert(width > 0 && height > 0);
+   
+   pot_buffers = !pipe->screen->get_video_param
+   (
+      pipe->screen,
+      profile,
+      PIPE_VIDEO_CAP_NPOT_TEXTURES
+   );
+
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
+
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_decoder(pipe, profile, entrypoint, chroma_format, buffer_width, buffer_height);
+      default:
+         return NULL;
+   }
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h
new file mode 100644
index 00000000000..0e9280dbfa2
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_decoder.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_decoder_h
+#define vl_decoder_h
+
+#include <pipe/p_video_decoder.h>
+
+/**
+ * check if a given profile is supported with shader based decoding
+ */
+bool
+vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile);
+
+/**
+ * standard implementation of pipe->create_video_decoder
+ */
+struct pipe_video_decoder *
+vl_create_decoder(struct pipe_context *pipe,
+                  enum pipe_video_profile profile,
+                  enum pipe_video_entrypoint entrypoint,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height);
+
+#endif /* vl_decoder_h */
diff --git a/src/gallium/auxiliary/vl/vl_defines.h b/src/gallium/auxiliary/vl/vl_defines.h
new file mode 100644
index 00000000000..7568db027e6
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_defines.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_defines_h
+#define vl_defines_h
+
+/* constants usually used with all known codecs */
+#define MACROBLOCK_WIDTH 16
+#define MACROBLOCK_HEIGHT 16
+
+#define BLOCK_WIDTH 8
+#define BLOCK_HEIGHT 8
+
+#define VL_MAX_PLANES 3
+#define VL_MAX_REF_FRAMES 2
+
+#endif
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
new file mode 100644
index 00000000000..75e76c09f63
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -0,0 +1,860 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+
+#include <util/u_draw.h>
+#include <util/u_sampler.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include "vl_defines.h"
+#include "vl_types.h"
+#include "vl_vertex_buffers.h"
+#include "vl_idct.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_L_ADDR0,
+   VS_O_L_ADDR1,
+   VS_O_R_ADDR0,
+   VS_O_R_ADDR1
+};
+
+/**
+ * The DCT matrix stored as hex representation of floats. Equal to the following equation:
+ * for (i = 0; i < 8; ++i)
+ *    for (j = 0; j < 8; ++j)
+ *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
+ *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
+ */
+static const uint32_t const_matrix[8][8] = {
+   { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
+   { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
+   { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
+   { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
+   { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
+   { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
+   { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
+   { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
+};
+
+static void
+calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
+          struct ureg_src tc, struct ureg_src start, bool right_side,
+          bool transposed, float size)
+{
+   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
+   unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
+
+   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
+   unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
+
+   /*
+    * addr[0..1].(start) = right_side ? start.x : tc.x
+    * addr[0..1].(tc) = right_side ? tc.y : start.y
+    * addr[0..1].z = tc.z
+    * addr[1].(start) += 1.0f / scale
+    */
+   ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
+   ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
+
+   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
+   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
+}
+
+static void
+increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
+               struct ureg_src saddr[2], bool right_side, bool transposed,
+               int pos, float size)
+{
+   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
+   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
+
+   /*
+    * daddr[0..1].(start) = saddr[0..1].(start)
+    * daddr[0..1].(tc) = saddr[0..1].(tc)
+    */
+
+   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
+   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
+   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
+   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
+}
+
+static void
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
+           struct ureg_src sampler, bool resource3d)
+{
+   ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
+   ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
+}
+
+static void
+matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
+{
+   struct ureg_dst tmp;
+
+   tmp = ureg_DECL_temporary(shader);
+
+   /*
+    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
+    * dst = tmp.x + tmp.y
+    */
+   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
+   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
+   ureg_ADD(shader, dst,
+      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
+      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+
+   ureg_release_temporary(shader, tmp);
+}
+
+static void *
+create_mismatch_vert_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_dst t_tex;
+   struct ureg_dst o_vpos, o_addr[2];
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_tex = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+   o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+   /*
+    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = vpos + 7 / BLOCK_WIDTH
+    * o_vpos.xy = t_vpos * scale
+    *
+    * o_addr = calc_addr(...)
+    *
+    */
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
+
+   ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
+   calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
+
+   ureg_release_temporary(shader, t_tex);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_mismatch_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src addr[2];
+
+   struct ureg_dst m[8][2];
+   struct ureg_dst fragment;
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   for (i = 0; i < 8; ++i) {
+      m[i][0] = ureg_DECL_temporary(shader);
+      m[i][1] = ureg_DECL_temporary(shader);
+   }
+
+   for (i = 0; i < 8; ++i) {
+      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
+   }
+
+   for (i = 0; i < 8; ++i) {
+      struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) };
+      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
+   }
+
+   for (i = 1; i < 8; ++i) {
+      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
+      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
+   }
+
+   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
+   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
+
+   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
+   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
+   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
+
+   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
+            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
+   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
+            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
+
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
+   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
+
+   for (i = 0; i < 8; ++i) {
+      ureg_release_temporary(shader, m[i][0]);
+      ureg_release_temporary(shader, m[i][1]);
+   }
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_stage1_vert_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_dst t_tex, t_start;
+   struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_tex = ureg_DECL_temporary(shader);
+   t_start = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
+   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
+
+   /*
+    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = vpos + vrect
+    * o_vpos.xy = t_vpos * scale
+    * o_vpos.zw = vpos
+    *
+    * o_l_addr = calc_addr(...)
+    * o_r_addr = calc_addr(...)
+    *
+    */
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
+
+   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
+
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
+
+   calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
+   calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+
+   ureg_release_temporary(shader, t_tex);
+   ureg_release_temporary(shader, t_start);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_stage1_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src l_addr[2], r_addr[2];
+
+   struct ureg_dst l[4][2], r[2];
+   struct ureg_dst fragment[idct->nr_of_render_targets];
+
+   int i, j;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   for (i = 0; i < idct->nr_of_render_targets; ++i)
+       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
+
+   for (i = 0; i < 4; ++i) {
+      l[i][0] = ureg_DECL_temporary(shader);
+      l[i][1] = ureg_DECL_temporary(shader);
+   }
+
+   r[0] = ureg_DECL_temporary(shader);
+   r[1] = ureg_DECL_temporary(shader);
+
+   for (i = 0; i < 4; ++i) {
+      increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
+   }
+
+   for (i = 0; i < 4; ++i) {
+      struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) };
+      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
+   }
+
+   for (i = 0; i < idct->nr_of_render_targets; ++i) {
+      increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT);
+
+      struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
+      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
+
+      for (j = 0; j < 4; ++j) {
+         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
+      }
+   }
+
+   for (i = 0; i < 4; ++i) {
+      ureg_release_temporary(shader, l[i][0]);
+      ureg_release_temporary(shader, l[i][1]);
+   }
+   ureg_release_temporary(shader, r[0]);
+   ureg_release_temporary(shader, r[1]);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+void
+vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_output, struct ureg_dst tex)
+{
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_dst t_start;
+   struct ureg_dst o_l_addr[2], o_r_addr[2];
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_start = ureg_DECL_temporary(shader);
+
+   --first_output;
+
+   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
+   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
+
+   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
+   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
+
+   ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
+      ureg_scalar(vrect, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
+   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
+
+   calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+   calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+
+   ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
+   ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
+}
+
+void
+vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_input, struct ureg_dst fragment)
+{
+   struct ureg_src l_addr[2], r_addr[2];
+
+   struct ureg_dst l[2], r[2];
+
+   --first_input;
+
+   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   l[0] = ureg_DECL_temporary(shader);
+   l[1] = ureg_DECL_temporary(shader);
+   r[0] = ureg_DECL_temporary(shader);
+   r[1] = ureg_DECL_temporary(shader);
+
+   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
+   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
+
+   matrix_mul(shader, fragment, l, r);
+
+   ureg_release_temporary(shader, l[0]);
+   ureg_release_temporary(shader, l[1]);
+   ureg_release_temporary(shader, r[0]);
+   ureg_release_temporary(shader, r[1]);
+}
+
+static bool
+init_shaders(struct vl_idct *idct)
+{
+   idct->vs_mismatch = create_mismatch_vert_shader(idct);
+   if (!idct->vs_mismatch)
+      goto error_vs_mismatch;
+
+   idct->fs_mismatch = create_mismatch_frag_shader(idct);
+   if (!idct->fs_mismatch)
+      goto error_fs_mismatch;
+
+   idct->vs = create_stage1_vert_shader(idct);
+   if (!idct->vs)
+      goto error_vs;
+
+   idct->fs = create_stage1_frag_shader(idct);
+   if (!idct->fs)
+      goto error_fs;
+
+   return true;
+
+error_fs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
+
+error_vs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+
+error_fs_mismatch:
+   idct->pipe->delete_vs_state(idct->pipe, idct->fs);
+
+error_vs_mismatch:
+   return false;
+}
+
+static void
+cleanup_shaders(struct vl_idct *idct)
+{
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+   idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
+   idct->pipe->delete_fs_state(idct->pipe, idct->fs);
+}
+
+static bool
+init_state(struct vl_idct *idct)
+{
+   struct pipe_blend_state blend;
+   struct pipe_rasterizer_state rs_state;
+   struct pipe_sampler_state sampler;
+   unsigned i;
+
+   assert(idct);
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.point_size = 1;
+   rs_state.gl_rasterization_rules = true;
+   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
+   if (!idct->rs_state)
+      goto error_rs_state;
+
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
+   if (!idct->blend)
+      goto error_blend;
+
+   for (i = 0; i < 2; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
+      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+      sampler.compare_func = PIPE_FUNC_ALWAYS;
+      sampler.normalized_coords = 1;
+      idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
+      if (!idct->samplers[i])
+         goto error_samplers;
+   }
+
+   return true;
+
+error_samplers:
+   for (i = 0; i < 2; ++i)
+      if (idct->samplers[i])
+         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
+
+   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
+
+error_blend:
+   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
+
+error_rs_state:
+   return false;
+}
+
+static void
+cleanup_state(struct vl_idct *idct)
+{
+   unsigned i;
+
+   for (i = 0; i < 2; ++i)
+      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
+
+   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
+   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
+}
+
+static bool
+init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   struct pipe_resource *tex;
+   struct pipe_surface surf_templ;
+
+   assert(idct && buffer);
+
+   tex = buffer->sampler_views.individual.source->texture;
+
+   buffer->fb_state_mismatch.width = tex->width0;
+   buffer->fb_state_mismatch.height = tex->height0;
+   buffer->fb_state_mismatch.nr_cbufs = 1;
+
+   memset(&surf_templ, 0, sizeof(surf_templ));
+   surf_templ.format = tex->format;
+   surf_templ.u.tex.first_layer = 0;
+   surf_templ.u.tex.last_layer = 0;
+   surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
+
+   buffer->viewport_mismatch.scale[0] = tex->width0;
+   buffer->viewport_mismatch.scale[1] = tex->height0;
+   buffer->viewport_mismatch.scale[2] = 1;
+   buffer->viewport_mismatch.scale[3] = 1;
+
+   return true;
+}
+
+static void
+cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   assert(idct && buffer);
+
+   pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
+}
+
+static bool
+init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   struct pipe_resource *tex;
+   struct pipe_surface surf_templ;
+   unsigned i;
+
+   assert(idct && buffer);
+
+   tex = buffer->sampler_views.individual.intermediate->texture;
+
+   buffer->fb_state.width = tex->width0;
+   buffer->fb_state.height = tex->height0;
+   buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
+   for(i = 0; i < idct->nr_of_render_targets; ++i) {
+      memset(&surf_templ, 0, sizeof(surf_templ));
+      surf_templ.format = tex->format;
+      surf_templ.u.tex.first_layer = i;
+      surf_templ.u.tex.last_layer = i;
+      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+      buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
+         idct->pipe, tex, &surf_templ);
+
+      if (!buffer->fb_state.cbufs[i])
+         goto error_surfaces;
+   }
+
+   buffer->viewport.scale[0] = tex->width0;
+   buffer->viewport.scale[1] = tex->height0;
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+
+   return true;
+
+error_surfaces:
+   for(i = 0; i < idct->nr_of_render_targets; ++i)
+      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
+
+   return false;
+}
+
+static void
+cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   unsigned i;
+
+   assert(idct && buffer);
+
+   for(i = 0; i < idct->nr_of_render_targets; ++i)
+      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
+}
+
+struct pipe_sampler_view *
+vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
+{
+   struct pipe_resource tex_templ, *matrix;
+   struct pipe_sampler_view sv_templ, *sv;
+   struct pipe_transfer *buf_transfer;
+   unsigned i, j, pitch;
+   float *f;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH / 4,
+      BLOCK_HEIGHT,
+      1
+   };
+
+   assert(pipe);
+
+   memset(&tex_templ, 0, sizeof(tex_templ));
+   tex_templ.target = PIPE_TEXTURE_2D;
+   tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   tex_templ.last_level = 0;
+   tex_templ.width0 = 2;
+   tex_templ.height0 = 8;
+   tex_templ.depth0 = 1;
+   tex_templ.array_size = 1;
+   tex_templ.usage = PIPE_USAGE_IMMUTABLE;
+   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   tex_templ.flags = 0;
+
+   matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
+   if (!matrix)
+      goto error_matrix;
+
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, matrix,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   if (!buf_transfer)
+      goto error_transfer;
+
+   pitch = buf_transfer->stride / sizeof(float);
+
+   f = pipe->transfer_map(pipe, buf_transfer);
+   if (!f)
+      goto error_map;
+
+   for(i = 0; i < BLOCK_HEIGHT; ++i)
+      for(j = 0; j < BLOCK_WIDTH; ++j)
+         // transpose and scale
+         f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+   memset(&sv_templ, 0, sizeof(sv_templ));
+   u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
+   sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
+   pipe_resource_reference(&matrix, NULL);
+   if (!sv)
+      goto error_map;
+
+   return sv;
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   pipe_resource_reference(&matrix, NULL);
+
+error_matrix:
+   return NULL;
+}
+
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
+                  unsigned buffer_width, unsigned buffer_height,
+                  unsigned nr_of_render_targets,
+                  struct pipe_sampler_view *matrix,
+                  struct pipe_sampler_view *transpose)
+{
+   assert(idct && pipe);
+   assert(matrix && transpose);
+
+   idct->pipe = pipe;
+   idct->buffer_width = buffer_width;
+   idct->buffer_height = buffer_height;
+   idct->nr_of_render_targets = nr_of_render_targets;
+
+   pipe_sampler_view_reference(&idct->matrix, matrix);
+   pipe_sampler_view_reference(&idct->transpose, transpose);
+
+   if(!init_shaders(idct))
+      return false;
+
+   if(!init_state(idct)) {
+      cleanup_shaders(idct);
+      return false;
+   }
+
+   return true;
+}
+
+void
+vl_idct_cleanup(struct vl_idct *idct)
+{
+   cleanup_shaders(idct);
+   cleanup_state(idct);
+
+   pipe_sampler_view_reference(&idct->matrix, NULL);
+   pipe_sampler_view_reference(&idct->transpose, NULL);
+}
+
+bool
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                    struct pipe_sampler_view *source,
+                    struct pipe_sampler_view *intermediate)
+{
+   assert(buffer && idct);
+   assert(source && intermediate);
+
+   memset(buffer, 0, sizeof(struct vl_idct_buffer));
+
+   buffer->idct = idct;
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
+
+   if (!init_source(idct, buffer))
+      return false;
+
+   if (!init_intermediate(idct, buffer))
+      return false;
+
+   return true;
+}
+
+void
+vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
+{
+   assert(buffer);
+
+   cleanup_source(buffer->idct, buffer);
+   cleanup_intermediate(buffer->idct, buffer);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
+}
+
+void
+vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_instances)
+{
+   struct vl_idct *idct;
+   assert(buffer);
+   
+   idct = buffer->idct;
+
+   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+   idct->pipe->bind_blend_state(idct->pipe, idct->blend);
+   idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
+
+   /* mismatch control */
+   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
+   idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
+   idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
+   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
+
+   /* first stage */
+   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
+   idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+   idct->pipe->bind_fs_state(idct->pipe, idct->fs);
+   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+}
+
+void
+vl_idct_prepare_stage2(struct vl_idct_buffer *buffer)
+{
+   assert(buffer);
+
+   /* second stage */
+   buffer->idct->pipe->bind_rasterizer_state(buffer->idct->pipe, buffer->idct->rs_state);
+   buffer->idct->pipe->bind_fragment_sampler_states(buffer->idct->pipe, 2, buffer->idct->samplers);
+   buffer->idct->pipe->set_fragment_sampler_views(buffer->idct->pipe, 2, buffer->sampler_views.stage[1]);
+}
+
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
new file mode 100644
index 00000000000..98e2c795564
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -0,0 +1,121 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_idct_h
+#define vl_idct_h
+
+#include <pipe/p_state.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+/* shader based inverse distinct cosinus transformation
+ * expect usage of vl_vertex_buffers as a todo list
+ */
+struct vl_idct
+{
+   struct pipe_context *pipe;
+
+   unsigned buffer_width;
+   unsigned buffer_height;
+   unsigned nr_of_render_targets;
+
+   void *rs_state;
+   void *blend;
+
+   void *samplers[2];
+
+   void *vs_mismatch, *fs_mismatch;
+   void *vs, *fs;
+
+   struct pipe_sampler_view *matrix;
+   struct pipe_sampler_view *transpose;
+};
+
+/* a set of buffers to work with */
+struct vl_idct_buffer
+{
+   struct vl_idct *idct;
+   
+   struct pipe_viewport_state viewport_mismatch;
+   struct pipe_viewport_state viewport;
+
+   struct pipe_framebuffer_state fb_state_mismatch;
+   struct pipe_framebuffer_state fb_state;
+
+   union
+   {
+      struct pipe_sampler_view *all[4];
+      struct pipe_sampler_view *stage[2][2];
+      struct {
+         struct pipe_sampler_view *source, *matrix;
+         struct pipe_sampler_view *intermediate, *transpose;
+      } individual;
+   } sampler_views;
+};
+
+/* upload the idct matrix, which can be shared by all idct instances of a pipe */
+struct pipe_sampler_view *
+vl_idct_upload_matrix(struct pipe_context *pipe, float scale);
+
+void
+vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_output, struct ureg_dst tex);
+
+void
+vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_input, struct ureg_dst fragment);
+
+/* init an idct instance */
+bool
+vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
+             unsigned buffer_width, unsigned buffer_height,
+             unsigned nr_of_render_targets,
+             struct pipe_sampler_view *matrix,
+             struct pipe_sampler_view *transpose);
+
+/* destroy an idct instance */
+void
+vl_idct_cleanup(struct vl_idct *idct);
+
+/* init a buffer assosiated with agiven idct instance */
+bool
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                    struct pipe_sampler_view *source,
+                    struct pipe_sampler_view *intermediate);
+
+/* cleanup a buffer of an idct instance */
+void
+vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer);
+
+/* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */
+void
+vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_verts);
+
+void
+vl_idct_prepare_stage2(struct vl_idct_buffer *buffer);
+
+#endif
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
new file mode 100644
index 00000000000..bd05205b52d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -0,0 +1,660 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_context.h>
+
+#include <util/u_sampler.h>
+#include <util/u_draw.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include "vl_defines.h"
+#include "vl_vertex_buffers.h"
+#include "vl_mc.h"
+#include "vl_idct.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_VTOP,
+   VS_O_VBOTTOM,
+
+   VS_O_FLAGS = VS_O_VTOP,
+   VS_O_VTEX = VS_O_VBOTTOM
+};
+
+static struct ureg_dst
+calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale)
+{
+   struct ureg_src vrect, vpos;
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_vpos = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   /*
+    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = (vpos + vrect) * block_scale
+    * o_vpos.xy = t_vpos
+    * o_vpos.zw = vpos
+    */
+   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   return t_vpos;
+}
+
+static struct ureg_dst
+calc_line(struct ureg_program *shader)
+{
+   struct ureg_dst tmp;
+   struct ureg_src pos;
+
+   tmp = ureg_DECL_temporary(shader);
+
+   pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR);
+
+   /*
+    * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
+    */
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+
+   return tmp;
+}
+
+static void *
+create_ref_vert_shader(struct vl_mc *r)
+{
+   struct ureg_program *shader;
+   struct ureg_src mv_scale;
+   struct ureg_src vrect, vmv[2];
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos, o_vmv[2];
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
+   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
+
+   t_vpos = calc_position(r, shader, ureg_imm2f(shader,
+      (float)MACROBLOCK_WIDTH / r->buffer_width,
+      (float)MACROBLOCK_HEIGHT / r->buffer_height)
+   );
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
+
+   /*
+    * mv_scale.xy = 0.5 / (dst.width, dst.height);
+    * mv_scale.z = 1.0f / 4.0f
+    * mv_scale.w = 1.0f / 255.0f
+    *
+    * // Apply motion vectors
+    * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos
+    * o_vmv[0..1].zw = vmv[0..1] * mv_scale
+    *
+    */
+
+   mv_scale = ureg_imm4f(shader,
+      0.5f / r->buffer_width,
+      0.5f / r->buffer_height,
+      1.0f / 4.0f,
+      1.0f / PIPE_VIDEO_MV_WEIGHT_MAX);
+
+   for (i = 0; i < 2; ++i) {
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
+      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]);
+   }
+
+   ureg_release_temporary(shader, t_vpos);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ref_frag_shader(struct vl_mc *r)
+{
+   const float y_scale =
+      r->buffer_height / 2 *
+      r->macroblock_size / MACROBLOCK_HEIGHT;
+
+   struct ureg_program *shader;
+   struct ureg_src tc[2], sampler;
+   struct ureg_dst ref, field;
+   struct ureg_dst fragment;
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
+
+   sampler = ureg_DECL_sampler(shader, 0);
+   ref = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   field = calc_line(shader);
+
+   /*
+    * ref = field.z ? tc[1] : tc[0]
+    *
+    * // Adjust tc acording to top/bottom field selection
+    * if (|ref.z|) {
+    *    ref.y *= y_scale
+    *    ref.y = floor(ref.y)
+    *    ref.y += ref.z
+    *    ref.y /= y_scale
+    * }
+    * fragment.xyz = tex(ref, sampler[0])
+    */
+   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+
+   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);
+
+      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_imm1f(shader, y_scale));
+      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
+      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
+      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
+
+   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);
+
+   ureg_release_temporary(shader, ref);
+
+   ureg_release_temporary(shader, field);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src vrect, vpos;
+   struct ureg_dst t_vpos, t_vtex;
+   struct ureg_dst o_vpos, o_flags;
+
+   struct vertex2f scale = {
+      (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size,
+      (float)BLOCK_HEIGHT / r->buffer_height * MACROBLOCK_HEIGHT / r->macroblock_size
+   };
+
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y));
+   t_vtex = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS);
+
+   /*
+    * o_vtex.xy = t_vpos
+    * o_flags.z = intra * 0.5
+    *
+    * if(interlaced) {
+    *    t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
+    *    t_vtex.z = vpos.y % 2
+    *    t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
+    *    o_vpos.y = t_vtex.y + t_vpos.y
+    *
+    *    o_flags.w = t_vtex.z ? 0 : 1
+    * }
+    *
+    */
+
+   vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos);
+
+   ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z),
+            ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));
+
+   if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
+      ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);
+
+         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY),
+                  ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)),
+                  ureg_imm2f(shader, 0.0f, scale.y),
+                  ureg_imm2f(shader, -scale.y, 0.0f));
+         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z),
+                  ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f));
+
+         ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex));
+
+         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y),
+                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
+                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X),
+                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y));
+         ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
+                  ureg_src(t_vpos), ureg_src(t_vtex));
+
+         ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W),
+                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
+                  ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));
+
+      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+      ureg_ENDIF(shader);
+   }
+
+   ureg_release_temporary(shader, t_vtex);
+   ureg_release_temporary(shader, t_vpos);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
+                         vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
+{
+   struct ureg_program *shader;
+   struct ureg_src flags;
+   struct ureg_dst tmp;
+   struct ureg_dst fragment;
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   tmp = calc_line(shader);
+
+   /*
+    * if (field == tc.w)
+    *    kill();
+    * else {
+    *    fragment.xyz  = tex(tc, sampler) * scale + tc.z
+    *    fragment.w = 1.0f
+    * }
+    */
+
+   ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+            ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp));
+
+   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+
+      ureg_KILP(shader);
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ELSE(shader, &label);
+
+      fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp);
+
+      if (scale != 1.0f)
+         ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
+                  ureg_src(tmp), ureg_imm1f(shader, scale),
+                  ureg_scalar(flags, TGSI_SWIZZLE_Z));
+      else
+         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
+                  ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z));
+                  
+      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f));
+      ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
+
+   ureg_release_temporary(shader, tmp);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static bool
+init_pipe_state(struct vl_mc *r)
+{
+   struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
+   struct pipe_rasterizer_state rs_state;
+   unsigned i;
+
+   assert(r);
+
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+   sampler.compare_func = PIPE_FUNC_ALWAYS;
+   sampler.normalized_coords = 1;
+   r->sampler_ref = r->pipe->create_sampler_state(r->pipe, &sampler);
+   if (!r->sampler_ref)
+      goto error_sampler_ref;
+
+   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      memset(&blend, 0, sizeof blend);
+      blend.independent_blend_enable = 0;
+      blend.rt[0].blend_enable = 1;
+      blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+      blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+      blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+      blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+      blend.logicop_enable = 0;
+      blend.logicop_func = PIPE_LOGICOP_CLEAR;
+      blend.rt[0].colormask = i;
+      blend.dither = 0;
+      r->blend_clear[i] = r->pipe->create_blend_state(r->pipe, &blend);
+      if (!r->blend_clear[i])
+         goto error_blend;
+
+      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+      r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend);
+      if (!r->blend_add[i])
+         goto error_blend;
+
+      blend.rt[0].rgb_func = PIPE_BLEND_REVERSE_SUBTRACT;
+      blend.rt[0].alpha_dst_factor = PIPE_BLEND_REVERSE_SUBTRACT;
+      r->blend_sub[i] = r->pipe->create_blend_state(r->pipe, &blend);
+      if (!r->blend_sub[i])
+         goto error_blend;
+   }
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   /*rs_state.sprite_coord_enable */
+   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
+   rs_state.point_quad_rasterization = true;
+   rs_state.point_size = BLOCK_WIDTH;
+   rs_state.gl_rasterization_rules = true;
+   r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
+   if (!r->rs_state)
+      goto error_rs_state;
+
+   return true;
+
+error_rs_state:
+error_blend:
+   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      if (r->blend_sub[i])
+         r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]);
+
+      if (r->blend_add[i])
+         r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
+
+      if (r->blend_clear[i])
+         r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
+   }
+
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
+
+error_sampler_ref:
+   return false;
+}
+
+static void
+cleanup_pipe_state(struct vl_mc *r)
+{
+   unsigned i;
+
+   assert(r);
+
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
+   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
+      r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
+      r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]);
+   }
+   r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
+}
+
+bool
+vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
+           unsigned buffer_width, unsigned buffer_height,
+           unsigned macroblock_size, float scale,
+           vl_mc_ycbcr_vert_shader vs_callback,
+           vl_mc_ycbcr_frag_shader fs_callback,
+           void *callback_priv)
+{
+   assert(renderer);
+   assert(pipe);
+
+   memset(renderer, 0, sizeof(struct vl_mc));
+
+   renderer->pipe = pipe;
+   renderer->buffer_width = buffer_width;
+   renderer->buffer_height = buffer_height;
+   renderer->macroblock_size = macroblock_size;
+
+   if (!init_pipe_state(renderer))
+      goto error_pipe_state;
+
+   renderer->vs_ref = create_ref_vert_shader(renderer);
+   if (!renderer->vs_ref)
+      goto error_vs_ref;
+
+   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer, vs_callback, callback_priv);
+   if (!renderer->vs_ycbcr)
+      goto error_vs_ycbcr;
+
+   renderer->fs_ref = create_ref_frag_shader(renderer);
+   if (!renderer->fs_ref)
+      goto error_fs_ref;
+
+   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, false, fs_callback, callback_priv);
+   if (!renderer->fs_ycbcr)
+      goto error_fs_ycbcr;
+
+   renderer->fs_ycbcr_sub = create_ycbcr_frag_shader(renderer, scale, true, fs_callback, callback_priv);
+   if (!renderer->fs_ycbcr_sub)
+      goto error_fs_ycbcr_sub;
+
+   return true;
+   
+error_fs_ycbcr_sub:
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
+
+error_fs_ycbcr:
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
+
+error_fs_ref:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
+
+error_vs_ycbcr:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
+
+error_vs_ref:
+   cleanup_pipe_state(renderer);
+
+error_pipe_state:
+   return false;
+}
+
+void
+vl_mc_cleanup(struct vl_mc *renderer)
+{
+   assert(renderer);
+
+   cleanup_pipe_state(renderer);
+
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr_sub);
+}
+
+bool
+vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer)
+{
+   assert(renderer && buffer);
+
+   buffer->renderer = renderer;
+
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
+
+   buffer->fb_state.nr_cbufs = 1;
+   buffer->fb_state.zsbuf = NULL;
+
+   return true;
+}
+
+void
+vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer)
+{
+   assert(buffer);
+}
+
+void
+vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface)
+{
+   assert(buffer && surface);
+
+   buffer->surface_cleared = false;
+
+   buffer->viewport.scale[0] = surface->width;
+   buffer->viewport.scale[1] = surface->height;
+
+   buffer->fb_state.width = surface->width;
+   buffer->fb_state.height = surface->height;
+   buffer->fb_state.cbufs[0] = surface;
+}
+
+static void
+prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask)
+{
+   struct vl_mc *renderer;
+
+   assert(buffer);
+
+   renderer = buffer->renderer;
+   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
+
+   if (buffer->surface_cleared)
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]);
+   else
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]);
+
+   renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state);
+   renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport);
+}
+
+void
+vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
+{
+   struct vl_mc *renderer;
+
+   assert(buffer && ref);
+
+   prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
+
+   renderer = buffer->renderer;
+
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
+
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref);
+
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0,
+                              renderer->buffer_width / MACROBLOCK_WIDTH *
+                              renderer->buffer_height / MACROBLOCK_HEIGHT);
+
+   buffer->surface_cleared = true;
+}
+
+void
+vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances)
+{
+   struct vl_mc *renderer;
+   unsigned mask = 1 << component;
+
+   assert(buffer);
+
+   if (num_instances == 0)
+      return;
+
+   prepare_pipe_4_rendering(buffer, mask);
+
+   renderer = buffer->renderer;
+
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
+
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+   
+   if (buffer->surface_cleared) {
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_sub[mask]);
+      renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr_sub);
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+   }
+}
diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h
new file mode 100644
index 00000000000..9fabf02a3ac
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mc.h
@@ -0,0 +1,99 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_mc_h
+#define vl_mc_h
+
+#include <pipe/p_state.h>
+#include <pipe/p_video_state.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include "vl_defines.h"
+#include "vl_types.h"
+
+#define VL_MC_NUM_BLENDERS (1 << VL_MAX_PLANES)
+
+struct pipe_context;
+
+struct vl_mc
+{
+   struct pipe_context *pipe;
+   unsigned buffer_width;
+   unsigned buffer_height;
+   unsigned macroblock_size;
+
+   void *rs_state;
+
+   void *blend_clear[VL_MC_NUM_BLENDERS];
+   void *blend_add[VL_MC_NUM_BLENDERS];
+   void *blend_sub[VL_MC_NUM_BLENDERS];
+   void *vs_ref, *vs_ycbcr;
+   void *fs_ref, *fs_ycbcr, *fs_ycbcr_sub;
+   void *sampler_ref;
+};
+
+struct vl_mc_buffer
+{
+   struct vl_mc *renderer;
+
+   bool surface_cleared;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
+};
+
+typedef void (*vl_mc_ycbcr_vert_shader)(void *priv, struct vl_mc *mc,
+                                        struct ureg_program *shader,
+                                        unsigned first_output,
+                                        struct ureg_dst tex);
+
+typedef void (*vl_mc_ycbcr_frag_shader)(void *priv, struct vl_mc *mc,
+                                        struct ureg_program *shader,
+                                        unsigned first_input,
+                                        struct ureg_dst dst);
+
+bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
+                unsigned picture_width, unsigned picture_height,
+                unsigned macroblock_size, float scale,
+                vl_mc_ycbcr_vert_shader vs_callback,
+                vl_mc_ycbcr_frag_shader fs_callback,
+                void *callback_priv);
+
+void vl_mc_cleanup(struct vl_mc *renderer);
+
+bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer);
+
+void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer);
+
+void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface);
+
+void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref);
+
+void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances);
+
+#endif /* vl_mc_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
new file mode 100644
index 00000000000..7a14efb627e
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -0,0 +1,1836 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
+ * which in turn is based on mpeg2dec. The following is the original copyright:
+ *
+ * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <stdint.h>
+
+#include <pipe/p_video_state.h>
+
+#include "vl_vlc.h"
+#include "vl_mpeg12_bitstream.h"
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
+
+/* macroblock modes */
+#define MACROBLOCK_INTRA 1
+#define MACROBLOCK_PATTERN 2
+#define MACROBLOCK_MOTION_BACKWARD 4
+#define MACROBLOCK_MOTION_FORWARD 8
+#define MACROBLOCK_QUANT 16
+
+/* motion_type */
+#define MOTION_TYPE_MASK (3*64)
+#define MOTION_TYPE_BASE 64
+#define MC_FIELD (1*64)
+#define MC_FRAME (2*64)
+#define MC_16X8 (2*64)
+#define MC_DMV (3*64)
+
+/* picture structure */
+#define TOP_FIELD     1
+#define BOTTOM_FIELD  2
+#define FRAME_PICTURE 3
+
+/* picture coding type (mpeg2 header) */
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+#define D_TYPE 4
+
+typedef struct {
+   uint8_t modes;
+   uint8_t len;
+} MBtab;
+
+typedef struct {
+   uint8_t delta;
+   uint8_t len;
+} MVtab;
+
+typedef struct {
+   int8_t dmv;
+   uint8_t len;
+} DMVtab;
+
+typedef struct {
+   uint8_t cbp;
+   uint8_t len;
+} CBPtab;
+
+typedef struct {
+   uint8_t size;
+   uint8_t len;
+} DCtab;
+
+typedef struct {
+   uint8_t run;
+   uint8_t level;
+   uint8_t len;
+} DCTtab;
+
+typedef struct {
+   uint8_t mba;
+   uint8_t len;
+} MBAtab;
+
+#define INTRA MACROBLOCK_INTRA
+#define QUANT MACROBLOCK_QUANT
+#define MC MACROBLOCK_MOTION_FORWARD
+#define CODED MACROBLOCK_PATTERN
+#define FWD MACROBLOCK_MOTION_FORWARD
+#define BWD MACROBLOCK_MOTION_BACKWARD
+#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
+
+static const MBtab MB_I [] = {
+   {INTRA|QUANT, 2}, {INTRA, 1}
+};
+
+static const MBtab MB_P [] = {
+   {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
+   {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
+   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
+};
+
+static const MBtab MB_B [] = {
+   {0,                 0}, {INTRA|QUANT,       6},
+   {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
+   {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
+                                     {INTRA,       5}, {INTRA,       5},
+   {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
+   {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
+   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
+};
+
+#undef INTRA
+#undef QUANT
+#undef MC
+#undef CODED
+#undef FWD
+#undef BWD
+#undef INTER
+
+static const MVtab MV_4 [] = {
+   { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
+};
+
+static const MVtab MV_10 [] = {
+   { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
+   { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
+   {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
+   { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
+   { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
+   { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
+};
+
+static const DMVtab DMV_2 [] = {
+   { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
+};
+
+static const CBPtab CBP_7 [] = {
+   {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
+   {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
+   {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
+   {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
+   {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
+   {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
+   {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
+   {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
+   {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
+   {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
+   {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
+   {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
+   {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
+   {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
+   {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
+   {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
+   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
+};
+
+static const CBPtab CBP_9 [] = {
+   {0,    0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
+   {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
+   {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
+   {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
+   {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
+   {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
+   {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
+   {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
+   {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
+   {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
+   {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
+   {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
+   {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
+   {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
+   {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
+   {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
+};
+
+static const DCtab DC_lum_5 [] = {
+   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+   {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+   {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
+};
+
+static const DCtab DC_chrom_5 [] = {
+   {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+   {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
+};
+
+static const DCtab DC_long [] = {
+   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+   {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
+   {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
+};
+
+static const DCTtab DCT_16 [] = {
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
+   {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
+   { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
+   { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
+};
+
+static const DCTtab DCT_15 [] = {
+   {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
+   {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
+   {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
+   {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
+   {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
+   {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
+   {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
+   {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
+   {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
+   {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
+   {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
+   {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
+};
+
+static const DCTtab DCT_13 [] = {
+   { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
+   {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
+   {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
+   { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
+   {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
+   {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
+   {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
+   { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
+   {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
+   { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
+   {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
+   {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
+};
+
+static const DCTtab DCT_B14_10 [] = {
+   { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
+   {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
+};
+
+static const DCTtab DCT_B14_8 [] = {
+   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
+   {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
+   {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
+   {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
+   {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
+   {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
+   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+   { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
+   {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
+};
+
+static const DCTtab DCT_B14AC_5 [] = {
+                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
+};
+
+static const DCTtab DCT_B14DC_5 [] = {
+                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
+};
+
+static const DCTtab DCT_B15_10 [] = {
+   {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
+   {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
+};
+
+static const DCTtab DCT_B15_8 [] = {
+   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
+   {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
+   {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
+   {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
+   {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
+   {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
+   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+   {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
+   { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
+   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+   { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
+   { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
+   {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
+   {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
+};
+
+static const MBAtab MBA_5 [] = {
+                   {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
+   {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
+   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
+   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
+};
+
+static const MBAtab MBA_11 [] = {
+   {32, 11}, {31, 11}, {30, 11}, {29, 11},
+   {28, 11}, {27, 11}, {26, 11}, {25, 11},
+   {24, 11}, {23, 11}, {22, 11}, {21, 11},
+   {20, 10}, {20, 10}, {19, 10}, {19, 10},
+   {18, 10}, {18, 10}, {17, 10}, {17, 10},
+   {16, 10}, {16, 10}, {15, 10}, {15, 10},
+   {14,  8}, {14,  8}, {14,  8}, {14,  8},
+   {14,  8}, {14,  8}, {14,  8}, {14,  8},
+   {13,  8}, {13,  8}, {13,  8}, {13,  8},
+   {13,  8}, {13,  8}, {13,  8}, {13,  8},
+   {12,  8}, {12,  8}, {12,  8}, {12,  8},
+   {12,  8}, {12,  8}, {12,  8}, {12,  8},
+   {11,  8}, {11,  8}, {11,  8}, {11,  8},
+   {11,  8}, {11,  8}, {11,  8}, {11,  8},
+   {10,  8}, {10,  8}, {10,  8}, {10,  8},
+   {10,  8}, {10,  8}, {10,  8}, {10,  8},
+   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
+};
+
+static const int non_linear_quantizer_scale[] = {
+   0,  1,  2,  3,  4,  5,   6,   7,
+   8, 10, 12, 14, 16, 18,  20,  22,
+   24, 28, 32, 36, 40, 44,  48,  52,
+   56, 64, 72, 80, 88, 96, 104, 112
+};
+
+static inline int
+get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
+{
+   int macroblock_modes;
+   const MBtab * tab;
+
+   switch (picture->picture_coding_type) {
+   case I_TYPE:
+
+      tab = MB_I + vl_vlc_ubits(&bs->vlc, 1);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      macroblock_modes = tab->modes;
+
+      return macroblock_modes;
+
+   case P_TYPE:
+
+      tab = MB_P + vl_vlc_ubits(&bs->vlc, 5);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      macroblock_modes = tab->modes;
+
+      if (picture->picture_structure != FRAME_PICTURE) {
+         if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+            vl_vlc_dumpbits(&bs->vlc, 2);
+          }
+          return macroblock_modes;
+      } else if (picture->frame_pred_frame_dct) {
+          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+            macroblock_modes |= MC_FRAME;
+          return macroblock_modes;
+      } else {
+          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+            vl_vlc_dumpbits(&bs->vlc, 2);
+          }
+          return macroblock_modes;
+      }
+
+   case B_TYPE:
+
+      tab = MB_B + vl_vlc_ubits(&bs->vlc, 6);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      macroblock_modes = tab->modes;
+
+      if (picture->picture_structure != FRAME_PICTURE) {
+          if (! (macroblock_modes & MACROBLOCK_INTRA)) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+            vl_vlc_dumpbits(&bs->vlc, 2);
+          }
+      } else if (picture->frame_pred_frame_dct) {
+          macroblock_modes |= MC_FRAME;
+      } else if (!(macroblock_modes & MACROBLOCK_INTRA)) {
+          macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+          vl_vlc_dumpbits(&bs->vlc, 2);
+      }
+      return macroblock_modes;
+
+   case D_TYPE:
+
+      vl_vlc_dumpbits(&bs->vlc, 1);
+      return MACROBLOCK_INTRA;
+
+   default:
+      return 0;
+   }
+}
+
+static inline enum pipe_mpeg12_dct_type
+get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes)
+{
+   enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME;
+
+   if ((picture->picture_structure == FRAME_PICTURE) &&
+       (!picture->frame_pred_frame_dct) &&
+       (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) {
+
+      dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+      vl_vlc_dumpbits(&bs->vlc, 1);
+   }
+   return dct_type;
+}
+
+static inline int
+get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
+{
+   int quantizer_scale_code;
+
+   quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5);
+   vl_vlc_dumpbits(&bs->vlc, 5);
+
+   if (picture->q_scale_type)
+      return non_linear_quantizer_scale[quantizer_scale_code];
+   else
+      return quantizer_scale_code << 1;
+}
+
+static inline int
+get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code)
+{
+   int delta;
+   int sign;
+   const MVtab * tab;
+
+   if (bs->vlc.buf & 0x80000000) {
+      vl_vlc_dumpbits(&bs->vlc, 1);
+      return 0;
+   } else if (bs->vlc.buf >= 0x0c000000) {
+
+      tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4);
+      delta = (tab->delta << f_code) + 1;
+      bs->vlc.bits += tab->len + f_code + 1;
+      bs->vlc.buf <<= tab->len;
+
+      sign = vl_vlc_sbits(&bs->vlc, 1);
+      bs->vlc.buf <<= 1;
+
+      if (f_code)
+         delta += vl_vlc_ubits(&bs->vlc, f_code);
+      bs->vlc.buf <<= f_code;
+
+      return (delta ^ sign) - sign;
+
+   } else {
+
+      tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10);
+      delta = (tab->delta << f_code) + 1;
+      bs->vlc.bits += tab->len + 1;
+      bs->vlc.buf <<= tab->len;
+
+      sign = vl_vlc_sbits(&bs->vlc, 1);
+      bs->vlc.buf <<= 1;
+
+      if (f_code) {
+         vl_vlc_needbits(&bs->vlc);
+         delta += vl_vlc_ubits(&bs->vlc, f_code);
+         vl_vlc_dumpbits(&bs->vlc, f_code);
+      }
+
+      return (delta ^ sign) - sign;
+   }
+}
+
+static inline int
+bound_motion_vector(int vec, unsigned f_code)
+{
+#if 1
+   unsigned int limit;
+   int sign;
+
+   limit = 16 << f_code;
+
+   if ((unsigned int)(vec + limit) < 2 * limit)
+      return vec;
+   else {
+      sign = ((int32_t)vec) >> 31;
+      return vec - ((2 * limit) ^ sign) + sign;
+   }
+#else
+   return ((int32_t)vec << (28 - f_code)) >> (28 - f_code);
+#endif
+}
+
+static inline int
+get_dmv(struct vl_mpg12_bs *bs)
+{
+   const DMVtab * tab;
+
+   tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2);
+   vl_vlc_dumpbits(&bs->vlc, tab->len);
+   return tab->dmv;
+}
+
+static inline int
+get_coded_block_pattern(struct vl_mpg12_bs *bs)
+{
+   const CBPtab * tab;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   if (bs->vlc.buf >= 0x20000000) {
+
+      tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      return tab->cbp;
+
+   } else {
+
+      tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      return tab->cbp;
+   }
+}
+
+static inline int
+get_luma_dc_dct_diff(struct vl_mpg12_bs *bs)
+{
+   const DCtab * tab;
+   int size;
+   int dc_diff;
+
+   if (bs->vlc.buf < 0xf8000000) {
+      tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5);
+      size = tab->size;
+      if (size) {
+         bs->vlc.bits += tab->len + size;
+         bs->vlc.buf <<= tab->len;
+         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+         bs->vlc.buf <<= size;
+         return dc_diff;
+      } else {
+         vl_vlc_dumpbits(&bs->vlc, 3);
+         return 0;
+      }
+   } else {
+      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0);
+      size = tab->size;
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      vl_vlc_needbits(&bs->vlc);
+      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+      vl_vlc_dumpbits(&bs->vlc, size);
+      return dc_diff;
+   }
+}
+
+static inline int
+get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
+{
+   const DCtab * tab;
+   int size;
+   int dc_diff;
+
+   if (bs->vlc.buf < 0xf8000000) {
+      tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5);
+      size = tab->size;
+      if (size) {
+         bs->vlc.bits += tab->len + size;
+         bs->vlc.buf <<= tab->len;
+         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+         bs->vlc.buf <<= size;
+         return dc_diff;
+      } else {
+         vl_vlc_dumpbits(&bs->vlc, 2);
+         return 0;
+      }
+   } else {
+      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0);
+      size = tab->size;
+      vl_vlc_dumpbits(&bs->vlc, tab->len + 1);
+      vl_vlc_needbits(&bs->vlc);
+      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+      vl_vlc_dumpbits(&bs->vlc, size);
+      return dc_diff;
+   }
+}
+
+static inline void
+get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+{
+   int i, val;
+   const DCTtab *tab;
+
+   i = 0;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = tab->level * quantizer_scale;
+
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         dest[i] = val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
+
+         dest[i] = val;
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+{
+   int i, val;
+   const DCTtab * tab;
+
+   i = 0;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   while (1) {
+      if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64) {
+
+         normal_code:
+            bs->vlc.buf <<= tab->len;
+            bs->vlc.bits += tab->len + 1;
+            val = tab->level * quantizer_scale;
+
+            val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+            dest[i] = val;
+
+            bs->vlc.buf <<= 1;
+            vl_vlc_needbits(&bs->vlc);
+
+            continue;
+
+         } else {
+
+            /* end of block. I commented out this code because if we */
+            /* dont exit here we will still exit at the later test :) */
+
+            /* if (i >= 128) break;	*/	/* end of block */
+
+            /* escape code */
+
+            i += UBITS(bs->vlc.buf << 6, 6) - 64;
+            if (i >= 64)
+                break;	/* illegal, check against buffer overflow */
+
+            vl_vlc_dumpbits(&bs->vlc, 12);
+            vl_vlc_needbits(&bs->vlc);
+            val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
+
+            dest[i] = val;
+
+            vl_vlc_dumpbits(&bs->vlc, 12);
+            vl_vlc_needbits(&bs->vlc);
+
+            continue;
+
+          }
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+
+   vl_vlc_dumpbits(&bs->vlc, 4);	/* dump end of block code */
+}
+
+static inline void
+get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+{
+   int i, val;
+   const DCTtab *tab;
+
+   i = -1;
+
+   vl_vlc_needbits(&bs->vlc);
+   if (bs->vlc.buf >= 0x28000000) {
+      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+      goto entry_1;
+   } else
+      goto entry_2;
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+      entry_1:
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = ((2*tab->level+1) * quantizer_scale) >> 1;
+
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         dest[i] = val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      }
+
+   entry_2:
+      if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
+         val = (val * quantizer_scale) / 2;
+
+         dest[i] = val;
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+{
+   int i, val;
+   const DCTtab * tab;
+
+   i = 0;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = tab->level * quantizer_scale;
+
+         /* oddification */
+         val = (val - 1) | 1;
+
+         /* if (bitstream_get (1)) val = -val; */
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         dest[i] = val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = vl_vlc_sbits(&bs->vlc, 8);
+         if (! (val & 0x7f)) {
+            vl_vlc_dumpbits(&bs->vlc, 8);
+            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
+         }
+         val = val * quantizer_scale;
+
+         /* oddification */
+         val = (val + ~SBITS (val, 1)) | 1;
+
+         dest[i] = val;
+
+         vl_vlc_dumpbits(&bs->vlc, 8);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+{
+   int i, val;
+   const DCTtab * tab;
+
+   i = -1;
+
+   vl_vlc_needbits(&bs->vlc);
+   if (bs->vlc.buf >= 0x28000000) {
+      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+      goto entry_1;
+   } else
+      goto entry_2;
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+      entry_1:
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = ((2*tab->level+1) * quantizer_scale) >> 1;
+
+         /* oddification */
+         val = (val - 1) | 1;
+
+         /* if (bitstream_get (1)) val = -val; */
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         dest[i] = val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      }
+
+   entry_2:
+      if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = vl_vlc_sbits(&bs->vlc, 8);
+         if (! (val & 0x7f)) {
+            vl_vlc_dumpbits(&bs->vlc, 8);
+            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
+         }
+         val = 2 * (val + SBITS (val, 1)) + 1;
+         val = (val * quantizer_scale) / 2;
+
+         /* oddification */
+         val = (val + ~SBITS (val, 1)) | 1;
+
+         dest[i] = val;
+
+         vl_vlc_dumpbits(&bs->vlc, 8);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
+                 unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
+{
+   short dest[64];
+
+   bs->ycbcr_stream[cc]->x = x;
+   bs->ycbcr_stream[cc]->y = y;
+   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA;
+   bs->ycbcr_stream[cc]->coding = coding;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   /* Get the intra DC coefficient and inverse quantize it */
+   if (cc == 0)
+      dc_dct_pred[0] += get_luma_dc_dct_diff(bs);
+   else
+      dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
+
+   memset(dest, 0, sizeof(int16_t) * 64);
+   dest[0] = dc_dct_pred[cc];
+   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
+      if (picture->picture_coding_type != D_TYPE)
+          get_mpeg1_intra_block(bs, quantizer_scale, dest);
+   } else if (picture->intra_vlc_format)
+      get_intra_block_B15(bs, quantizer_scale, dest);
+   else
+      get_intra_block_B14(bs, quantizer_scale, dest);
+
+   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
+
+   bs->num_ycbcr_blocks[cc]++;
+   bs->ycbcr_stream[cc]++;
+   bs->ycbcr_buffer[cc] += 64;
+}
+
+static inline void
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
+                    unsigned x, unsigned y,  enum pipe_mpeg12_dct_type coding, int quantizer_scale)
+{
+   short dest[64];
+
+   bs->ycbcr_stream[cc]->x = x;
+   bs->ycbcr_stream[cc]->y = y;
+   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA;
+   bs->ycbcr_stream[cc]->coding = coding;
+
+   memset(dest, 0, sizeof(int16_t) * 64);
+   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
+      get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
+   else
+      get_non_intra_block(bs, quantizer_scale, dest);
+
+   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
+
+   bs->num_ycbcr_blocks[cc]++;
+   bs->ycbcr_stream[cc]++;
+   bs->ycbcr_buffer[cc] += 64;
+}
+
+static inline void
+motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1]));
+   motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1]));
+   motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector(motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector(motion_y, f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+   mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
+      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
+   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
+   mv->top.y = motion_y << 1;
+
+   vl_vlc_needbits(&bs->vlc);
+   mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
+      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]);
+   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
+   mv->bottom.y = motion_y << 1;
+}
+
+static inline void
+motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   // TODO Implement dmv
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector(motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
+   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
+   mv->top.y = mv->bottom.y = motion_y << 1;
+}
+
+/* like motion_frame, but parsing without actual motion compensation */
+static inline void
+motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int tmp;
+
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
+   tmp = bound_motion_vector (tmp, f_code[0]);
+   mv->top.x = mv->bottom.x = tmp;
+
+   vl_vlc_needbits(&bs->vlc);
+   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
+   tmp = bound_motion_vector (tmp, f_code[1]);
+   mv->top.y = mv->bottom.y = tmp;
+
+   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
+}
+
+static inline void
+motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   // ref_field
+   //vl_vlc_ubits(&bs->vlc, 1);
+
+   // TODO field select may need to do something here for bob (weave ok)
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   // ref_field
+   //vl_vlc_ubits(&bs->vlc, 1);
+
+   // TODO field select may need to do something here bob  (weave ok)
+   mv->top.field_select = PIPE_VIDEO_FRAME;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->top.y = motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+   // ref_field
+   //vl_vlc_ubits(&bs->vlc, 1);
+
+   // TODO field select may need to do something here for bob (weave ok)
+   mv->bottom.field_select = PIPE_VIDEO_FRAME;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   // TODO field select may need to do something here for bob  (weave ok)
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+
+static inline void
+motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int tmp;
+
+   vl_vlc_needbits(&bs->vlc);
+   vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */
+
+   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
+   tmp = bound_motion_vector(tmp, f_code[0]);
+   mv->top.x = mv->bottom.x = tmp;
+
+   vl_vlc_needbits(&bs->vlc);
+   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
+   tmp = bound_motion_vector(tmp, f_code[1]);
+   mv->top.y = mv->bottom.y = tmp;
+
+   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
+}
+
+#define MOTION_CALL(routine, macroblock_modes)		\
+do {							\
+   if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD)  \
+      routine(bs, picture->f_code[0], &mv_fwd);         \
+   if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD)	\
+      routine(bs, picture->f_code[1], &mv_bwd);         \
+} while (0)
+
+static inline void
+store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos,
+                    struct pipe_motionvector *mv_fwd,
+                    struct pipe_motionvector *mv_bwd)
+{
+   bs->mv_stream[0][*mv_pos].top = mv_fwd->top;
+   bs->mv_stream[0][*mv_pos].bottom =
+      mv_fwd->top.field_select == PIPE_VIDEO_FRAME ?
+      mv_fwd->top : mv_fwd->bottom;
+
+   bs->mv_stream[1][*mv_pos].top = mv_bwd->top;
+   bs->mv_stream[1][*mv_pos].bottom =
+      mv_bwd->top.field_select == PIPE_VIDEO_FRAME ?
+      mv_bwd->top : mv_bwd->bottom;
+
+   (*mv_pos)++;
+}
+
+static inline bool
+slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+           int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos)
+{
+   const MBAtab * mba;
+
+   vl_vlc_need32bits(&bs->vlc);
+   while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) {
+      if(!vl_vlc_getbyte(&bs->vlc))
+         return false;
+   }
+   *y = (bs->vlc.buf & 0xFF) - 1;
+   vl_vlc_restart(&bs->vlc);
+
+   *quantizer_scale = get_quantizer_scale(bs, picture);
+
+   /* ignore intra_slice and all the extra data */
+   while (bs->vlc.buf & 0x80000000) {
+      vl_vlc_dumpbits(&bs->vlc, 9);
+      vl_vlc_needbits(&bs->vlc);
+   }
+
+   /* decode initial macroblock address increment */
+   *x = 0;
+   while (1) {
+      if (bs->vlc.buf >= 0x08000000) {
+          mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2);
+          break;
+      } else if (bs->vlc.buf >= 0x01800000) {
+          mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24);
+          break;
+      } else switch (vl_vlc_ubits(&bs->vlc, 12)) {
+      case 8:		/* macroblock_escape */
+          *x += 33;
+          vl_vlc_dumpbits(&bs->vlc, 11);
+          vl_vlc_needbits(&bs->vlc);
+          continue;
+      case 15:	/* macroblock_stuffing (MPEG1 only) */
+          bs->vlc.buf &= 0xfffff;
+          vl_vlc_dumpbits(&bs->vlc, 11);
+          vl_vlc_needbits(&bs->vlc);
+          continue;
+      default:	/* error */
+          return false;
+      }
+   }
+   vl_vlc_dumpbits(&bs->vlc, mba->len + 1);
+   *x += mba->mba;
+
+   while (*x >= bs->width) {
+      *x -= bs->width;
+      (*y)++;
+   }
+   if (*y > bs->height)
+      return false;
+
+   *mv_pos = *x + *y * bs->width;
+
+   return true;
+}
+
+static inline bool
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
+{
+   enum pipe_video_field_select default_field_select;
+   struct pipe_motionvector mv_fwd, mv_bwd;
+   enum pipe_mpeg12_dct_type dct_type;
+
+   /* predictor for DC coefficients in intra blocks */
+   int dc_dct_pred[3] = { 0, 0, 0 };
+   int quantizer_scale;
+
+   unsigned x, y, mv_pos;
+
+   switch(picture->picture_structure) {
+   case TOP_FIELD:
+      default_field_select = PIPE_VIDEO_TOP_FIELD;
+      break;
+
+   case BOTTOM_FIELD:
+      default_field_select = PIPE_VIDEO_BOTTOM_FIELD;
+      break;
+
+   default:
+      default_field_select = PIPE_VIDEO_FRAME;
+      break;
+   }
+
+   if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos))
+      return false;
+
+   mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+   mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
+
+   mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+   mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
+
+   while (1) {
+      int macroblock_modes;
+      int mba_inc;
+      const MBAtab * mba;
+
+      vl_vlc_needbits(&bs->vlc);
+
+      macroblock_modes = get_macroblock_modes(bs, picture);
+      dct_type = get_dct_type(bs, picture, macroblock_modes);
+
+      switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) {
+      case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD):
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
+         break;
+
+      default:
+         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
+         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
+
+         /* fall through */
+      case MACROBLOCK_MOTION_FORWARD:
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+         break;
+
+      case MACROBLOCK_MOTION_BACKWARD:
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         break;
+      }
+
+      /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
+      if (macroblock_modes & MACROBLOCK_QUANT)
+         quantizer_scale = get_quantizer_scale(bs, picture);
+
+      if (macroblock_modes & MACROBLOCK_INTRA) {
+
+         if (picture->concealment_motion_vectors) {
+            if (picture->picture_structure == FRAME_PICTURE)
+               motion_fr_conceal(bs, picture->f_code[0], &mv_fwd);
+            else
+               motion_fi_conceal(bs, picture->f_code[0], &mv_fwd);
+
+         } else {
+            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+            mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+         }
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+
+         // unravaled loop of 6 block(i) calls in macroblock()
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+
+         if (picture->picture_coding_type == D_TYPE) {
+            vl_vlc_needbits(&bs->vlc);
+            vl_vlc_dumpbits(&bs->vlc, 1);
+         }
+
+      } else {
+         if (picture->picture_structure == FRAME_PICTURE)
+            switch (macroblock_modes & MOTION_TYPE_MASK) {
+            case MC_FRAME:
+               if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
+                  MOTION_CALL(motion_mp1, macroblock_modes);
+               } else {
+                  MOTION_CALL(motion_fr_frame, macroblock_modes);
+               }
+               break;
+
+            case MC_FIELD:
+               MOTION_CALL (motion_fr_field, macroblock_modes);
+               break;
+
+            case MC_DMV:
+               MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD);
+               break;
+
+            case 0:
+               /* non-intra mb without forward mv in a P picture */
+               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+               break;
+            }
+         else
+            switch (macroblock_modes & MOTION_TYPE_MASK) {
+            case MC_FIELD:
+               MOTION_CALL (motion_fi_field, macroblock_modes);
+               break;
+
+            case MC_16X8:
+               MOTION_CALL (motion_fi_16x8, macroblock_modes);
+               break;
+
+            case MC_DMV:
+               MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD);
+               break;
+
+            case 0:
+               /* non-intra mb without forward mv in a P picture */
+               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+               break;
+            }
+
+         if (macroblock_modes & MACROBLOCK_PATTERN) {
+            int coded_block_pattern = get_coded_block_pattern(bs);
+
+            // TODO  optimize not fully used for idct accel only mc.
+            if (coded_block_pattern & 0x20)
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
+            if (coded_block_pattern & 0x10)
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
+            if (coded_block_pattern & 0x08)
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
+            if (coded_block_pattern & 0x04)
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
+            if (coded_block_pattern & 0x2)
+               slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
+            if (coded_block_pattern & 0x1)
+               slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
+         }
+
+         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
+      }
+
+      store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
+      if (++x >= bs->width) {
+         ++y;
+         if (y >= bs->height)
+            return false;
+         x -= bs->width;
+      }
+
+      vl_vlc_needbits(&bs->vlc);
+      mba_inc = 0;
+      while (1) {
+         if (bs->vlc.buf >= 0x10000000) {
+            mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2);
+            break;
+         } else if (bs->vlc.buf >= 0x03000000) {
+            mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24);
+            break;
+         } else switch (vl_vlc_ubits(&bs->vlc, 11)) {
+         case 8:		/* macroblock_escape */
+            mba_inc += 33;
+            /* pass through */
+         case 15:	/* macroblock_stuffing (MPEG1 only) */
+            vl_vlc_dumpbits(&bs->vlc, 11);
+            vl_vlc_needbits(&bs->vlc);
+            continue;
+         default:	/* end of slice, or error */
+            return true;
+         }
+      }
+      vl_vlc_dumpbits(&bs->vlc, mba->len);
+      mba_inc += mba->mba;
+      if (mba_inc) {
+         //TODO  conversion to signed format signed format
+         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
+
+         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
+         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
+
+         if (picture->picture_coding_type == P_TYPE) {
+            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+            mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         }
+
+         x += mba_inc;
+         do {
+            store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
+         } while (--mba_inc);
+      }
+      while (x >= bs->width) {
+         ++y;
+         if (y >= bs->height)
+            return false;
+         x -= bs->width;
+      }
+   }
+}
+
+void
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
+{
+   assert(bs);
+
+   memset(bs, 0, sizeof(struct vl_mpg12_bs));
+
+   bs->width = width;
+   bs->height = height;
+}
+
+void
+vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
+                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES])
+{
+   unsigned i;
+
+   assert(bs);
+   assert(ycbcr_stream && ycbcr_buffer);
+   assert(mv_stream);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      bs->ycbcr_stream[i] = ycbcr_stream[i];
+      bs->ycbcr_buffer[i] = ycbcr_buffer[i];
+   }
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+      bs->mv_stream[i] = mv_stream[i];
+
+   // TODO
+   for (i = 0; i < bs->width*bs->height; ++i) {
+      bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0;
+      bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+      bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0;
+      bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+
+      bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0;
+      bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+      bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0;
+      bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+   }
+}
+
+void
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
+                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
+{
+   assert(bs);
+   assert(num_ycbcr_blocks);
+   assert(buffer && num_bytes);
+
+   bs->num_ycbcr_blocks = num_ycbcr_blocks;
+
+   vl_vlc_init(&bs->vlc, buffer, num_bytes);
+
+   while(decode_slice(bs, picture));
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
new file mode 100644
index 00000000000..4e48a9faa2f
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_mpeg12_bitstream_h
+#define vl_mpeg12_bitstream_h
+
+#include "vl_defines.h"
+#include "vl_vlc.h"
+
+struct vl_mpg12_bs
+{
+   unsigned width, height;
+
+   struct vl_vlc vlc;
+
+   unsigned *num_ycbcr_blocks;
+
+   struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+   short *ycbcr_buffer[VL_MAX_PLANES];
+
+   struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+};
+
+void
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height);
+
+void
+vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
+                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
+
+void
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
+                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]);
+
+#endif /* vl_mpeg12_bitstream_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
new file mode 100644
index 00000000000..98b0adabb2b
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -0,0 +1,968 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <math.h>
+#include <assert.h>
+
+#include <util/u_memory.h>
+#include <util/u_rect.h>
+#include <util/u_video.h>
+
+#include "vl_mpeg12_decoder.h"
+#include "vl_defines.h"
+
+#define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
+#define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
+
+struct format_config {
+   enum pipe_format zscan_source_format;
+   enum pipe_format idct_source_format;
+   enum pipe_format mc_source_format;
+
+   float idct_scale;
+   float mc_scale;
+};
+
+static const struct format_config bitstream_format_config[] = {
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
+};
+
+static const unsigned num_bitstream_format_configs =
+   sizeof(bitstream_format_config) / sizeof(struct format_config);
+
+static const struct format_config idct_format_config[] = {
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
+};
+
+static const unsigned num_idct_format_configs =
+   sizeof(idct_format_config) / sizeof(struct format_config);
+
+static const struct format_config mc_format_config[] = {
+   //{ PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SSCALED, 0.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SNORM, 0.0f, SCALE_FACTOR_SNORM }
+};
+
+static const unsigned num_mc_format_configs =
+   sizeof(mc_format_config) / sizeof(struct format_config);
+
+static bool
+init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   enum pipe_format formats[3];
+
+   struct pipe_sampler_view **source;
+   struct pipe_surface **destination;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
+   buffer->zscan_source = vl_video_buffer_create_ex
+   (
+      dec->base.context,
+      dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
+      align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
+      1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC
+   );
+
+   if (!buffer->zscan_source)
+      goto error_source;
+
+   source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
+   if (!source)
+      goto error_sampler;
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      destination = dec->idct_source->get_surfaces(dec->idct_source);
+   else
+      destination = dec->mc_source->get_surfaces(dec->mc_source);
+
+   if (!destination)
+      goto error_surface;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
+                                &buffer->zscan[i], source[i], destination[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
+
+error_surface:
+error_sampler:
+   buffer->zscan_source->destroy(buffer->zscan_source);
+
+error_source:
+   return false;
+}
+
+static void
+cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   unsigned i;
+
+   assert(buffer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_cleanup_buffer(&buffer->zscan[i]);
+   buffer->zscan_source->destroy(buffer->zscan_source);
+}
+
+static bool
+init_idct_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
+   if (!idct_source_sv)
+      goto error_source_sv;
+
+   mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
+   if (!mc_source_sv)
+      goto error_mc_source_sv;
+
+   for (i = 0; i < 3; ++i)
+      if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
+                               &buffer->idct[i], idct_source_sv[i],
+                               mc_source_sv[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_idct_cleanup_buffer(&buffer->idct[i - 1]);
+
+error_mc_source_sv:
+error_source_sv:
+   return false;
+}
+
+static void
+cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
+{
+   struct vl_mpeg12_decoder *dec;
+   unsigned i;
+   
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   for (i = 0; i < 3; ++i)
+      vl_idct_cleanup_buffer(&buf->idct[0]);
+}
+
+static bool
+init_mc_buffer(struct vl_mpeg12_buffer *buf)
+{
+   struct vl_mpeg12_decoder *dec;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
+      goto error_mc_y;
+
+   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1]))
+      goto error_mc_cb;
+
+   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2]))
+      goto error_mc_cr;
+
+   return true;
+
+error_mc_cr:
+   vl_mc_cleanup_buffer(&buf->mc[1]);
+
+error_mc_cb:
+   vl_mc_cleanup_buffer(&buf->mc[0]);
+
+error_mc_y:
+   return false;
+}
+
+static void
+cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
+{
+   unsigned i;
+
+   assert(buf);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_mc_cleanup_buffer(&buf->mc[i]);
+}
+
+static void
+vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   cleanup_zscan_buffer(buf);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      cleanup_idct_buffer(buf);
+
+   cleanup_mc_buffer(buf);
+
+   vl_vb_cleanup(&buf->vertex_stream);
+
+   FREE(buf);
+}
+
+static void
+vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+
+   struct pipe_sampler_view **sampler_views;
+   unsigned i;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   vl_vb_map(&buf->vertex_stream, dec->base.context);
+
+   sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
+
+   assert(sampler_views);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      struct pipe_resource *tex = sampler_views[i]->texture;
+      struct pipe_box rect =
+      {
+         0, 0, 0,
+         tex->width0,
+         tex->height0,
+         1
+      };
+
+      buf->tex_transfer[i] = dec->base.context->get_transfer
+      (
+         dec->base.context, tex,
+         0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &rect
+      );
+
+      buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]);
+   }
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+      struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+      struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+
+      for (i = 0; i < VL_MAX_PLANES; ++i)
+         ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
+
+      for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+         mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
+
+      vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
+   } else {
+
+      for (i = 0; i < VL_MAX_PLANES; ++i)
+         vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
+   }
+}
+
+static void
+vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer,
+                                  const uint8_t intra_matrix[64],
+                                  const uint8_t non_intra_matrix[64])
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   unsigned i;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, non_intra_matrix);
+}
+
+static struct pipe_ycbcr_block *
+vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   assert(buf);
+
+   return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
+}
+
+static short *
+vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   assert(buf);
+   assert(component < VL_MAX_PLANES);
+
+   return buf->texels[component];
+}
+
+static unsigned
+vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   assert(buf);
+
+   return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
+}
+
+static struct pipe_motionvector *
+vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   assert(buf);
+
+   return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
+}
+
+static void
+vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
+                                  unsigned num_bytes, const void *data,
+                                  struct pipe_mpeg12_picture_desc *picture,
+                                  unsigned num_ycbcr_blocks[3])
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
+
+   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
+}
+
+static void
+vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   vl_vb_unmap(&buf->vertex_stream, dec->base.context);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]);
+      dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]);
+   }
+}
+
+static void
+vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+
+   assert(decoder);
+
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   dec->base.context->bind_vs_state(dec->base.context, NULL);
+   dec->base.context->bind_fs_state(dec->base.context, NULL);
+
+   dec->base.context->delete_depth_stencil_alpha_state(dec->base.context, dec->dsa);
+   dec->base.context->delete_sampler_state(dec->base.context, dec->sampler_ycbcr);
+
+   vl_mc_cleanup(&dec->mc_y);
+   vl_mc_cleanup(&dec->mc_c);
+   dec->mc_source->destroy(dec->mc_source);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      vl_idct_cleanup(&dec->idct_y);
+      vl_idct_cleanup(&dec->idct_c);
+      dec->idct_source->destroy(dec->idct_source);
+   }
+
+   vl_zscan_cleanup(&dec->zscan_y);
+   vl_zscan_cleanup(&dec->zscan_c);
+
+   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
+   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_mv);
+
+   pipe_resource_reference(&dec->quads.buffer, NULL);
+   pipe_resource_reference(&dec->pos.buffer, NULL);
+   pipe_resource_reference(&dec->block_num.buffer, NULL);
+
+   pipe_sampler_view_reference(&dec->zscan_linear, NULL);
+   pipe_sampler_view_reference(&dec->zscan_normal, NULL);
+   pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
+
+   FREE(dec);
+}
+
+static struct pipe_video_decode_buffer *
+vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+   struct vl_mpeg12_buffer *buffer;
+
+   assert(dec);
+
+   buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
+   if (buffer == NULL)
+      return NULL;
+
+   buffer->base.decoder = decoder;
+   buffer->base.destroy = vl_mpeg12_buffer_destroy;
+   buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame;
+   buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix;
+   buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
+   buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
+   buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
+   buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
+   buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
+   buffer->base.end_frame = vl_mpeg12_buffer_end_frame;
+
+   if (!vl_vb_init(&buffer->vertex_stream, dec->base.context,
+                   dec->base.width / MACROBLOCK_WIDTH,
+                   dec->base.height / MACROBLOCK_HEIGHT))
+      goto error_vertex_buffer;
+
+   if (!init_mc_buffer(buffer))
+      goto error_mc;
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      if (!init_idct_buffer(buffer))
+         goto error_idct;
+
+   if (!init_zscan_buffer(buffer))
+      goto error_zscan;
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
+      vl_mpg12_bs_init(&buffer->bs,
+                       dec->base.width / MACROBLOCK_WIDTH,
+                       dec->base.height / MACROBLOCK_HEIGHT);
+
+   return &buffer->base;
+
+error_zscan:
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      cleanup_idct_buffer(buffer);
+
+error_idct:
+   cleanup_mc_buffer(buffer);
+
+error_mc:
+   vl_vb_cleanup(&buffer->vertex_stream);
+
+error_vertex_buffer:
+   FREE(buffer);
+   return NULL;
+}
+
+static void
+vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
+                               unsigned num_ycbcr_blocks[3],
+                               struct pipe_video_buffer *refs[2],
+                               struct pipe_video_buffer *dst)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
+   struct vl_mpeg12_decoder *dec;
+
+   struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
+   struct pipe_surface **surfaces;
+
+   struct pipe_vertex_buffer vb[3];
+
+   unsigned i, j, component;
+   unsigned nr_components;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   for (i = 0; i < 2; ++i)
+      sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
+
+   vb[0] = dec->quads;
+   vb[1] = dec->pos;
+
+   surfaces = dst->get_surfaces(dst);
+
+   dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv);
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      if (!surfaces[i]) continue;
+
+      vl_mc_set_surface(&buf->mc[i], surfaces[i]);
+
+      for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
+         if (!sv[j]) continue;
+
+         vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
+         dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+
+         vl_mc_render_ref(&buf->mc[i], sv[j][i]);
+      }
+   }
+
+   vb[2] = dec->block_num;
+
+   dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      if (!num_ycbcr_blocks[i]) continue;
+
+      vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
+      dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+
+      vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
+
+      if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+         vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]);
+   }
+
+   mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
+   for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
+      if (!surfaces[i]) continue;
+
+      nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
+      for (j = 0; j < nr_components; ++j, ++component) {
+         if (!num_ycbcr_blocks[i]) continue;
+
+         vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
+         dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+
+         if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+            vl_idct_prepare_stage2(&buf->idct[component]);
+         else {
+            dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]);
+            dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr);
+         }
+         vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
+      }
+   }
+}
+
+static bool
+init_pipe_state(struct vl_mpeg12_decoder *dec)
+{
+   struct pipe_depth_stencil_alpha_state dsa;
+   struct pipe_sampler_state sampler;
+   unsigned i;
+
+   assert(dec);
+
+   memset(&dsa, 0, sizeof dsa);
+   dsa.depth.enabled = 0;
+   dsa.depth.writemask = 0;
+   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   for (i = 0; i < 2; ++i) {
+      dsa.stencil[i].enabled = 0;
+      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].valuemask = 0;
+      dsa.stencil[i].writemask = 0;
+   }
+   dsa.alpha.enabled = 0;
+   dsa.alpha.func = PIPE_FUNC_ALWAYS;
+   dsa.alpha.ref_value = 0;
+   dec->dsa = dec->base.context->create_depth_stencil_alpha_state(dec->base.context, &dsa);
+   dec->base.context->bind_depth_stencil_alpha_state(dec->base.context, dec->dsa);
+
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+   sampler.compare_func = PIPE_FUNC_ALWAYS;
+   sampler.normalized_coords = 1;
+   dec->sampler_ycbcr = dec->base.context->create_sampler_state(dec->base.context, &sampler);
+   if (!dec->sampler_ycbcr)
+      return false;
+
+   return true;
+}
+
+static const struct format_config*
+find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config configs[], unsigned num_configs)
+{
+   struct pipe_screen *screen;
+   unsigned i;
+
+   assert(dec);
+
+   screen = dec->base.context->screen;
+
+   for (i = 0; i < num_configs; ++i) {
+      if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D,
+                                       1, PIPE_BIND_SAMPLER_VIEW))
+         continue;
+
+      if (configs[i].idct_source_format != PIPE_FORMAT_NONE) {
+         if (!screen->is_format_supported(screen, configs[i].idct_source_format, PIPE_TEXTURE_2D,
+                                          1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+            continue;
+
+         if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_3D,
+                                          1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+            continue;
+      } else {
+         if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_2D,
+                                          1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+            continue;
+      }
+      return &configs[i];
+   }
+
+   return NULL;
+}
+
+static bool
+init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
+{
+   unsigned num_channels;
+
+   assert(dec);
+
+   dec->zscan_source_format = format_config->zscan_source_format;
+   dec->zscan_linear = vl_zscan_layout(dec->base.context, vl_zscan_linear, dec->blocks_per_line);
+   dec->zscan_normal = vl_zscan_layout(dec->base.context, vl_zscan_normal, dec->blocks_per_line);
+   dec->zscan_alternate = vl_zscan_layout(dec->base.context, vl_zscan_alternate, dec->blocks_per_line);
+
+   num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
+
+   if (!vl_zscan_init(&dec->zscan_y, dec->base.context, dec->base.width, dec->base.height,
+                      dec->blocks_per_line, dec->num_blocks, num_channels))
+      return false;
+
+   if (!vl_zscan_init(&dec->zscan_c, dec->base.context, dec->chroma_width, dec->chroma_height,
+                      dec->blocks_per_line, dec->num_blocks, num_channels))
+      return false;
+
+   return true;
+}
+
+static bool
+init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
+{
+   unsigned nr_of_idct_render_targets, max_inst;
+   enum pipe_format formats[3];
+
+   struct pipe_sampler_view *matrix = NULL;
+
+   nr_of_idct_render_targets = dec->base.context->screen->get_param
+   (
+      dec->base.context->screen, PIPE_CAP_MAX_RENDER_TARGETS
+   );
+   
+   max_inst = dec->base.context->screen->get_shader_param
+   (
+      dec->base.context->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS
+   );
+
+   // Just assume we need 32 inst per render target, not 100% true, but should work in most cases
+   if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4)
+      // more than 4 render targets usually doesn't makes any seens
+      nr_of_idct_render_targets = 4;
+   else
+      nr_of_idct_render_targets = 1;
+
+   formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
+   dec->idct_source = vl_video_buffer_create_ex
+   (
+      dec->base.context, dec->base.width / 4, dec->base.height, 1,
+      dec->base.chroma_format, formats, PIPE_USAGE_STATIC
+   );
+
+   if (!dec->idct_source)
+      goto error_idct_source;
+
+   formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
+   dec->mc_source = vl_video_buffer_create_ex
+   (
+      dec->base.context, dec->base.width / nr_of_idct_render_targets,
+      dec->base.height / 4, nr_of_idct_render_targets,
+      dec->base.chroma_format, formats, PIPE_USAGE_STATIC
+   );
+
+   if (!dec->mc_source)
+      goto error_mc_source;
+
+   if (!(matrix = vl_idct_upload_matrix(dec->base.context, format_config->idct_scale)))
+      goto error_matrix;
+
+   if (!vl_idct_init(&dec->idct_y, dec->base.context, dec->base.width, dec->base.height,
+                     nr_of_idct_render_targets, matrix, matrix))
+      goto error_y;
+
+   if(!vl_idct_init(&dec->idct_c, dec->base.context, dec->chroma_width, dec->chroma_height,
+                    nr_of_idct_render_targets, matrix, matrix))
+      goto error_c;
+
+   pipe_sampler_view_reference(&matrix, NULL);
+
+   return true;
+
+error_c:
+   vl_idct_cleanup(&dec->idct_y);
+
+error_y:
+   pipe_sampler_view_reference(&matrix, NULL);
+
+error_matrix:
+   dec->mc_source->destroy(dec->mc_source);
+
+error_mc_source:
+   dec->idct_source->destroy(dec->idct_source);
+
+error_idct_source:
+   return false;
+}
+
+static bool
+init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
+{
+   enum pipe_format formats[3];
+
+   formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
+   dec->mc_source = vl_video_buffer_create_ex
+   (
+      dec->base.context, dec->base.width, dec->base.height, 1,
+      dec->base.chroma_format, formats, PIPE_USAGE_STATIC
+   );
+      
+   return dec->mc_source != NULL;
+}
+
+static void
+mc_vert_shader_callback(void *priv, struct vl_mc *mc,
+                        struct ureg_program *shader,
+                        unsigned first_output,
+                        struct ureg_dst tex)
+{
+   struct vl_mpeg12_decoder *dec = priv;
+   struct ureg_dst o_vtex;
+
+   assert(priv && mc);
+   assert(shader);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
+      vl_idct_stage2_vert_shader(idct, shader, first_output, tex);
+   } else {
+      o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output);
+      ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex));
+   }
+}
+
+static void
+mc_frag_shader_callback(void *priv, struct vl_mc *mc,
+                        struct ureg_program *shader,
+                        unsigned first_input,
+                        struct ureg_dst dst)
+{
+   struct vl_mpeg12_decoder *dec = priv;
+   struct ureg_src src, sampler;
+
+   assert(priv && mc);
+   assert(shader);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
+      vl_idct_stage2_frag_shader(idct, shader, first_input, dst);
+   } else {
+      src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR);
+      sampler = ureg_DECL_sampler(shader, 0);
+      ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler);
+   }
+}
+
+struct pipe_video_decoder *
+vl_create_mpeg12_decoder(struct pipe_context *context,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_entrypoint entrypoint,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height)
+{
+   const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
+   const struct format_config *format_config;
+   struct vl_mpeg12_decoder *dec;
+
+   assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
+
+   dec = CALLOC_STRUCT(vl_mpeg12_decoder);
+
+   if (!dec)
+      return NULL;
+
+   dec->base.context = context;
+   dec->base.profile = profile;
+   dec->base.entrypoint = entrypoint;
+   dec->base.chroma_format = chroma_format;
+   dec->base.width = width;
+   dec->base.height = height;
+
+   dec->base.destroy = vl_mpeg12_destroy;
+   dec->base.create_buffer = vl_mpeg12_create_buffer;
+   dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
+
+   dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
+   dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
+
+   dec->quads = vl_vb_upload_quads(dec->base.context);
+   dec->pos = vl_vb_upload_pos(
+      dec->base.context,
+      dec->base.width / MACROBLOCK_WIDTH,
+      dec->base.height / MACROBLOCK_HEIGHT
+   );
+   dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks);
+
+   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
+   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);
+
+   /* TODO: Implement 422, 444 */
+   assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      dec->chroma_width = dec->base.width / 2;
+      dec->chroma_height = dec->base.height / 2;
+   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      dec->chroma_width = dec->base.width;
+      dec->chroma_height = dec->base.height / 2;
+   } else {
+      dec->chroma_width = dec->base.width;
+      dec->chroma_height = dec->base.height;
+   }
+
+   switch (entrypoint) {
+   case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
+      format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
+      break;
+
+   case PIPE_VIDEO_ENTRYPOINT_IDCT:
+      format_config = find_format_config(dec, idct_format_config, num_idct_format_configs);
+      break;
+
+   case PIPE_VIDEO_ENTRYPOINT_MC:
+      format_config = find_format_config(dec, mc_format_config, num_mc_format_configs);
+      break;
+
+   default:
+      assert(0);
+      return NULL;
+   }
+
+   if (!format_config)
+      return NULL;
+
+   if (!init_zscan(dec, format_config))
+      goto error_zscan;
+
+   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      if (!init_idct(dec, format_config))
+         goto error_sources;
+   } else {
+      if (!init_mc_source_widthout_idct(dec, format_config))
+         goto error_sources;
+   }
+
+   if (!vl_mc_init(&dec->mc_y, dec->base.context, dec->base.width, dec->base.height,
+                   MACROBLOCK_HEIGHT, format_config->mc_scale,
+                   mc_vert_shader_callback, mc_frag_shader_callback, dec))
+      goto error_mc_y;
+
+   // TODO
+   if (!vl_mc_init(&dec->mc_c, dec->base.context, dec->base.width, dec->base.height,
+                   BLOCK_HEIGHT, format_config->mc_scale,
+                   mc_vert_shader_callback, mc_frag_shader_callback, dec))
+      goto error_mc_c;
+
+   if (!init_pipe_state(dec))
+      goto error_pipe_state;
+
+   return &dec->base;
+
+error_pipe_state:
+   vl_mc_cleanup(&dec->mc_c);
+
+error_mc_c:
+   vl_mc_cleanup(&dec->mc_y);
+
+error_mc_y:
+   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      vl_idct_cleanup(&dec->idct_y);
+      vl_idct_cleanup(&dec->idct_c);
+      dec->idct_source->destroy(dec->idct_source);
+   }
+   dec->mc_source->destroy(dec->mc_source);
+
+error_sources:
+   vl_zscan_cleanup(&dec->zscan_y);
+   vl_zscan_cleanup(&dec->zscan_c);
+
+error_zscan:
+   FREE(dec);
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
new file mode 100644
index 00000000000..01265e368a3
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -0,0 +1,105 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_mpeg12_decoder_h
+#define vl_mpeg12_decoder_h
+
+#include <pipe/p_video_decoder.h>
+
+#include "vl_mpeg12_bitstream.h"
+#include "vl_zscan.h"
+#include "vl_idct.h"
+#include "vl_mc.h"
+
+#include "vl_vertex_buffers.h"
+#include "vl_video_buffer.h"
+
+struct pipe_screen;
+struct pipe_context;
+
+struct vl_mpeg12_decoder
+{
+   struct pipe_video_decoder base;
+
+   unsigned chroma_width, chroma_height;
+
+   unsigned blocks_per_line;
+   unsigned num_blocks;
+
+   enum pipe_format zscan_source_format;
+
+   struct pipe_vertex_buffer quads;
+   struct pipe_vertex_buffer pos;
+   struct pipe_vertex_buffer block_num;
+
+   void *ves_ycbcr;
+   void *ves_mv;
+
+   void *sampler_ycbcr;
+
+   struct pipe_sampler_view *zscan_linear;
+   struct pipe_sampler_view *zscan_normal;
+   struct pipe_sampler_view *zscan_alternate;
+
+   struct pipe_video_buffer *idct_source;
+   struct pipe_video_buffer *mc_source;
+
+   struct vl_zscan zscan_y, zscan_c;
+   struct vl_idct idct_y, idct_c;
+   struct vl_mc mc_y, mc_c;
+
+   void *dsa;
+};
+
+struct vl_mpeg12_buffer
+{
+   struct pipe_video_decode_buffer base;
+
+   struct vl_vertex_buffer vertex_stream;
+
+   struct pipe_video_buffer *zscan_source;
+
+   struct vl_mpg12_bs bs;
+   struct vl_zscan_buffer zscan[VL_MAX_PLANES];
+   struct vl_idct_buffer idct[VL_MAX_PLANES];
+   struct vl_mc_buffer mc[VL_MAX_PLANES];
+
+   struct pipe_transfer *tex_transfer[VL_MAX_PLANES];
+   short *texels[VL_MAX_PLANES];
+};
+
+/**
+ * creates a shader based mpeg12 decoder
+ */
+struct pipe_video_decoder *
+vl_create_mpeg12_decoder(struct pipe_context *pipe,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_entrypoint entrypoint,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height);
+
+#endif /* vl_mpeg12_decoder_h */
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
new file mode 100644
index 00000000000..27bb69d67bc
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -0,0 +1,51 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_types_h
+#define vl_types_h
+
+struct vertex2f
+{
+   float x, y;
+};
+
+struct vertex2s
+{
+   short x, y;
+};
+
+struct vertex4f
+{
+   float x, y, z, w;
+};
+
+struct vertex4s
+{
+   short x, y, z, w;
+};
+
+#endif /* vl_types_h */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
new file mode 100644
index 00000000000..c0f1449bf80
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -0,0 +1,419 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+#include <util/u_format.h>
+#include "vl_vertex_buffers.h"
+#include "vl_types.h"
+
+/* vertices for a quad covering a block */
+static const struct vertex2f block_quad[4] = {
+   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
+};
+
+struct pipe_vertex_buffer
+vl_vb_upload_quads(struct pipe_context *pipe)
+{
+   struct pipe_vertex_buffer quad;
+   struct pipe_transfer *buf_transfer;
+   struct vertex2f *v;
+
+   unsigned i;
+
+   assert(pipe);
+
+   /* create buffer */
+   quad.stride = sizeof(struct vertex2f);
+   quad.buffer_offset = 0;
+   quad.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(struct vertex2f) * 4
+   );
+
+   if(!quad.buffer)
+      return quad;
+
+   /* and fill it */
+   v = pipe_buffer_map
+   (
+      pipe,
+      quad.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for (i = 0; i < 4; ++i, ++v) {
+      v->x = block_quad[i].x;
+      v->y = block_quad[i].y;
+   }
+
+   pipe_buffer_unmap(pipe, buf_transfer);
+
+   return quad;
+}
+
+struct pipe_vertex_buffer
+vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height)
+{
+   struct pipe_vertex_buffer pos;
+   struct pipe_transfer *buf_transfer;
+   struct vertex2s *v;
+
+   unsigned x, y;
+
+   assert(pipe);
+
+   /* create buffer */
+   pos.stride = sizeof(struct vertex2s);
+   pos.buffer_offset = 0;
+   pos.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(struct vertex2s) * width * height
+   );
+
+   if(!pos.buffer)
+      return pos;
+
+   /* and fill it */
+   v = pipe_buffer_map
+   (
+      pipe,
+      pos.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( y = 0; y < height; ++y) {
+      for ( x = 0; x < width; ++x, ++v) {
+         v->x = x;
+         v->y = y;
+      }
+   }
+
+   pipe_buffer_unmap(pipe, buf_transfer);
+
+   return pos;
+}
+
+struct pipe_vertex_buffer
+vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks)
+{
+   struct pipe_vertex_buffer buf;
+   struct pipe_transfer *buf_transfer;
+   struct vertex2s *v;
+   unsigned i;
+
+   assert(pipe);
+
+   /* create buffer */
+   buf.stride = sizeof(struct vertex2s);
+   buf.buffer_offset = 0;
+   buf.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(struct vertex2s) * num_blocks
+   );
+
+   if(!buf.buffer)
+      return buf;
+
+   /* and fill it */
+   v = pipe_buffer_map
+   (
+      pipe,
+      buf.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( i = 0; i < num_blocks; ++i, ++v) {
+      v->x = i;
+      v->y = i;
+   }
+
+   pipe_buffer_unmap(pipe, buf_transfer);
+
+   return buf;
+}
+
+static struct pipe_vertex_element
+vl_vb_get_quad_vertex_element(void)
+{
+   struct pipe_vertex_element element;
+
+   /* setup rectangle element */
+   element.src_offset = 0;
+   element.instance_divisor = 0;
+   element.vertex_buffer_index = 0;
+   element.src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   return element;
+}
+
+static void
+vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
+                     unsigned vertex_buffer_index)
+{
+   unsigned i, offset = 0;
+
+   assert(elements && num_elements);
+
+   for ( i = 0; i < num_elements; ++i ) {
+      elements[i].src_offset = offset;
+      elements[i].instance_divisor = 1;
+      elements[i].vertex_buffer_index = vertex_buffer_index;
+      offset += util_format_get_blocksize(elements[i].src_format);
+   }
+}
+
+void *
+vl_vb_get_ves_ycbcr(struct pipe_context *pipe)
+{
+   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
+
+   assert(pipe);
+
+   memset(&vertex_elems, 0, sizeof(vertex_elems));
+   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
+
+   /* Position element */
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
+
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
+
+   /* block num element */
+   vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2);
+
+   return pipe->create_vertex_elements_state(pipe, 3, vertex_elems);
+}
+
+void *
+vl_vb_get_ves_mv(struct pipe_context *pipe)
+{
+   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
+
+   assert(pipe);
+
+   memset(&vertex_elems, 0, sizeof(vertex_elems));
+   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
+
+   /* Position element */
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
+
+   /* motion vector TOP element */
+   vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
+
+   /* motion vector BOTTOM element */
+   vertex_elems[VS_I_MV_BOTTOM].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
+
+   vl_vb_element_helper(&vertex_elems[VS_I_MV_TOP], 2, 2);
+
+   return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
+}
+
+bool
+vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
+           unsigned width, unsigned height)
+{
+   unsigned i, size;
+
+   assert(buffer);
+
+   buffer->width = width;
+   buffer->height = height;
+
+   size = width * height;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      buffer->ycbcr[i].resource = pipe_buffer_create
+      (
+         pipe->screen,
+         PIPE_BIND_VERTEX_BUFFER,
+         PIPE_USAGE_STREAM,
+         sizeof(struct pipe_ycbcr_block) * size * 4
+      );
+      if (!buffer->ycbcr[i].resource)
+         goto error_ycbcr;
+   }
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      buffer->mv[i].resource = pipe_buffer_create
+      (
+         pipe->screen,
+         PIPE_BIND_VERTEX_BUFFER,
+         PIPE_USAGE_STREAM,
+         sizeof(struct pipe_motionvector) * size
+      );
+      if (!buffer->mv[i].resource)
+         goto error_mv;
+   }
+
+   vl_vb_map(buffer, pipe);
+   return true;
+
+error_mv:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->mv[i].resource, NULL);
+
+error_ycbcr:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->ycbcr[i].resource, NULL);
+   return false;
+}
+
+unsigned
+vl_vb_attributes_per_plock(struct vl_vertex_buffer *buffer)
+{
+   return 1;
+}
+
+struct pipe_vertex_buffer
+vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component)
+{
+   struct pipe_vertex_buffer buf;
+
+   assert(buffer);
+
+   buf.stride = sizeof(struct pipe_ycbcr_block);
+   buf.buffer_offset = 0;
+   buf.buffer = buffer->ycbcr[component].resource;
+
+   return buf;
+}
+
+struct pipe_vertex_buffer
+vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector)
+{
+   struct pipe_vertex_buffer buf;
+
+   assert(buffer);
+
+   buf.stride = sizeof(struct pipe_motionvector);
+   buf.buffer_offset = 0;
+   buf.buffer = buffer->mv[motionvector].resource;
+
+   return buf;
+}
+
+void
+vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
+{
+   unsigned i;
+
+   assert(buffer && pipe);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      buffer->ycbcr[i].vertex_stream = pipe_buffer_map
+      (
+         pipe,
+         buffer->ycbcr[i].resource,
+         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &buffer->ycbcr[i].transfer
+      );
+   }
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      buffer->mv[i].vertex_stream = pipe_buffer_map
+      (
+         pipe,
+         buffer->mv[i].resource,
+         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &buffer->mv[i].transfer
+      );
+   }
+
+}
+
+struct pipe_ycbcr_block *
+vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component)
+{
+   assert(buffer);
+   assert(component < VL_MAX_PLANES);
+
+   return buffer->ycbcr[component].vertex_stream;
+}
+
+unsigned
+vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer)
+{
+   assert(buffer);
+
+   return buffer->width;
+}
+
+struct pipe_motionvector *
+vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame)
+{
+   assert(buffer);
+   assert(ref_frame < VL_MAX_REF_FRAMES);
+
+   return buffer->mv[ref_frame].vertex_stream;
+}
+
+void
+vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
+{
+   unsigned i;
+
+   assert(buffer && pipe);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      pipe_buffer_unmap(pipe, buffer->ycbcr[i].transfer);
+   }
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      pipe_buffer_unmap(pipe, buffer->mv[i].transfer);
+   }
+}
+
+void
+vl_vb_cleanup(struct vl_vertex_buffer *buffer)
+{
+   unsigned i;
+
+   assert(buffer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      pipe_resource_reference(&buffer->ycbcr[i].resource, NULL);
+   }
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      pipe_resource_reference(&buffer->mv[i].resource, NULL);
+   }
+}
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
new file mode 100644
index 00000000000..74845a42b69
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -0,0 +1,104 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef vl_vertex_buffers_h
+#define vl_vertex_buffers_h
+
+#include <pipe/p_state.h>
+#include <pipe/p_video_state.h>
+
+#include "vl_defines.h"
+#include "vl_types.h"
+
+/* vertex buffers act as a todo list
+ * uploading all the usefull informations to video ram
+ * so a vertex shader can work with them
+ */
+
+/* inputs to the vertex shaders */
+enum VS_INPUT
+{
+   VS_I_RECT = 0,
+   VS_I_VPOS = 1,
+
+   VS_I_BLOCK_NUM = 2,
+
+   VS_I_MV_TOP = 2,
+   VS_I_MV_BOTTOM = 3,
+
+   NUM_VS_INPUTS = 4
+};
+
+struct vl_vertex_buffer
+{
+   unsigned width, height;
+
+   struct {
+      struct pipe_resource    *resource;
+      struct pipe_transfer    *transfer;
+      struct pipe_ycbcr_block *vertex_stream;
+   } ycbcr[VL_MAX_PLANES];
+
+   struct {
+      struct pipe_resource     *resource;
+      struct pipe_transfer     *transfer;
+      struct pipe_motionvector *vertex_stream;
+   } mv[VL_MAX_REF_FRAMES];
+};
+
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe);
+
+struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
+
+struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks);
+
+void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe);
+
+void *vl_vb_get_ves_mv(struct pipe_context *pipe);
+
+bool vl_vb_init(struct vl_vertex_buffer *buffer,
+                struct pipe_context *pipe,
+                unsigned width, unsigned height);
+
+unsigned vl_vb_attributes_per_plock(struct vl_vertex_buffer *buffer);
+
+void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
+
+struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
+
+struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component);
+
+struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
+
+unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer);
+
+struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame);
+
+void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
+
+void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
+
+#endif /* vl_vertex_buffers_h */
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
new file mode 100644
index 00000000000..4d8b6649dd2
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -0,0 +1,340 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+#include <pipe/p_state.h>
+
+#include <util/u_format.h>
+#include <util/u_inlines.h>
+#include <util/u_sampler.h>
+#include <util/u_memory.h>
+
+#include "vl_video_buffer.h"
+
+const enum pipe_format const_resource_formats_YV12[3] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8_UNORM
+};
+
+const enum pipe_format const_resource_formats_NV12[3] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8G8_UNORM,
+   PIPE_FORMAT_NONE
+};
+
+const enum pipe_format *
+vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format)
+{
+   switch(format) {
+   case PIPE_FORMAT_YV12:
+      return const_resource_formats_YV12;
+
+   case PIPE_FORMAT_NV12:
+      return const_resource_formats_NV12;
+
+   default:
+      return NULL;
+   }
+}
+
+boolean
+vl_video_buffer_is_format_supported(struct pipe_screen *screen,
+                                    enum pipe_format format,
+                                    enum pipe_video_profile profile)
+{
+   const enum pipe_format *resource_formats;
+   unsigned i;
+
+   resource_formats = vl_video_buffer_formats(screen, format);
+   if (!resource_formats)
+      return false;
+
+   for(i = 0; i < VL_MAX_PLANES; ++i) {
+      if (!resource_formats[i])
+         continue;
+
+      if (!screen->is_format_supported(screen, resource_formats[i], PIPE_TEXTURE_2D, 0, PIPE_USAGE_STATIC))
+         return false;
+   }
+
+   return true;
+}
+
+unsigned
+vl_video_buffer_max_size(struct pipe_screen *screen)
+{
+   uint32_t max_2d_texture_level;
+
+   max_2d_texture_level = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+
+   return 1 << (max_2d_texture_level-1);
+}
+
+static void
+vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
+   unsigned i;
+
+   assert(buf);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      pipe_surface_reference(&buf->surfaces[i], NULL);
+      pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+      pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
+      pipe_resource_reference(&buf->resources[i], NULL);
+   }
+
+   FREE(buffer);
+}
+
+static struct pipe_sampler_view **
+vl_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
+   struct pipe_sampler_view sv_templ;
+   struct pipe_context *pipe;
+   unsigned i;
+
+   assert(buf);
+
+   pipe = buf->base.context;
+
+   for (i = 0; i < buf->num_planes; ++i ) {
+      if (!buf->sampler_view_planes[i]) {
+         memset(&sv_templ, 0, sizeof(sv_templ));
+         u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format);
+
+         if (util_format_get_nr_components(buf->resources[i]->format) == 1)
+            sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = sv_templ.swizzle_a = PIPE_SWIZZLE_RED;
+
+         buf->sampler_view_planes[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ);
+         if (!buf->sampler_view_planes[i])
+            goto error;
+      }
+   }
+
+   return buf->sampler_view_planes;
+
+error:
+   for (i = 0; i < buf->num_planes; ++i )
+      pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+
+   return NULL;
+}
+
+static struct pipe_sampler_view **
+vl_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
+   struct pipe_sampler_view sv_templ;
+   struct pipe_context *pipe;
+   unsigned i, j, component;
+
+   assert(buf);
+
+   pipe = buf->base.context;
+
+   for (component = 0, i = 0; i < buf->num_planes; ++i ) {
+      unsigned nr_components = util_format_get_nr_components(buf->resources[i]->format);
+
+      for (j = 0; j < nr_components; ++j, ++component) {
+         assert(component < VL_MAX_PLANES);
+
+         if (!buf->sampler_view_components[component]) {
+            memset(&sv_templ, 0, sizeof(sv_templ));
+            u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format);
+            sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_RED + j;
+            sv_templ.swizzle_a = PIPE_SWIZZLE_ONE;
+            buf->sampler_view_components[component] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ);
+            if (!buf->sampler_view_components[component])
+               goto error;
+         }
+      }
+   }
+
+   return buf->sampler_view_components;
+
+error:
+   for (i = 0; i < VL_MAX_PLANES; ++i )
+      pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
+
+   return NULL;
+}
+
+static struct pipe_surface **
+vl_video_buffer_surfaces(struct pipe_video_buffer *buffer)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
+   struct pipe_surface surf_templ;
+   struct pipe_context *pipe;
+   unsigned i;
+
+   assert(buf);
+
+   pipe = buf->base.context;
+
+   for (i = 0; i < buf->num_planes; ++i ) {
+      if (!buf->surfaces[i]) {
+         memset(&surf_templ, 0, sizeof(surf_templ));
+         surf_templ.format = buf->resources[i]->format;
+         surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+         buf->surfaces[i] = pipe->create_surface(pipe, buf->resources[i], &surf_templ);
+         if (!buf->surfaces[i])
+            goto error;
+      }
+   }
+
+   return buf->surfaces;
+
+error:
+   for (i = 0; i < buf->num_planes; ++i )
+      pipe_surface_reference(&buf->surfaces[i], NULL);
+
+   return NULL;
+}
+
+struct pipe_video_buffer *
+vl_video_buffer_create(struct pipe_context *pipe,
+                       enum pipe_format buffer_format,
+                       enum pipe_video_chroma_format chroma_format,
+                       unsigned width, unsigned height)
+{
+   const enum pipe_format *resource_formats;
+   struct pipe_video_buffer *result;
+   unsigned buffer_width, buffer_height;
+   bool pot_buffers;
+
+   assert(pipe);
+   assert(width > 0 && height > 0);
+
+   pot_buffers = !pipe->screen->get_video_param
+   (
+      pipe->screen,
+      PIPE_VIDEO_PROFILE_UNKNOWN,
+      PIPE_VIDEO_CAP_NPOT_TEXTURES
+   );
+
+   resource_formats = vl_video_buffer_formats(pipe->screen, buffer_format);
+   if (!resource_formats)
+      return NULL;
+
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
+
+   result = vl_video_buffer_create_ex
+   (
+      pipe, buffer_width, buffer_height, 1,
+      chroma_format, resource_formats, PIPE_USAGE_STATIC
+   );
+   if (result)
+      result->buffer_format = buffer_format;
+
+   return result;
+}
+
+struct pipe_video_buffer *
+vl_video_buffer_create_ex(struct pipe_context *pipe,
+                          unsigned width, unsigned height, unsigned depth,
+                          enum pipe_video_chroma_format chroma_format,
+                          const enum pipe_format resource_formats[VL_MAX_PLANES],
+                          unsigned usage)
+{
+   struct vl_video_buffer *buffer;
+   struct pipe_resource templ;
+   unsigned i;
+
+   assert(pipe);
+
+   buffer = CALLOC_STRUCT(vl_video_buffer);
+
+   buffer->base.context = pipe;
+   buffer->base.destroy = vl_video_buffer_destroy;
+   buffer->base.get_sampler_view_planes = vl_video_buffer_sampler_view_planes;
+   buffer->base.get_sampler_view_components = vl_video_buffer_sampler_view_components;
+   buffer->base.get_surfaces = vl_video_buffer_surfaces;
+   buffer->base.chroma_format = chroma_format;
+   buffer->base.width = width;
+   buffer->base.height = height;
+   buffer->num_planes = 1;
+
+   memset(&templ, 0, sizeof(templ));
+   templ.target = depth > 1 ? PIPE_TEXTURE_3D : PIPE_TEXTURE_2D;
+   templ.format = resource_formats[0];
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = depth;
+   templ.array_size = 1;
+   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   templ.usage = usage;
+
+   buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[0])
+      goto error;
+
+   if (resource_formats[1] == PIPE_FORMAT_NONE) {
+      assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444);
+      assert(resource_formats[2] == PIPE_FORMAT_NONE);
+      return &buffer->base;
+   } else
+      buffer->num_planes = 2;
+
+   templ.format = resource_formats[1];
+   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      templ.width0 /= 2;
+      templ.height0 /= 2;
+   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      templ.height0 /= 2;
+   }
+
+   buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[1])
+      goto error;
+
+   if (resource_formats[2] == PIPE_FORMAT_NONE)
+      return &buffer->base;
+   else
+      buffer->num_planes = 3;
+
+   templ.format = resource_formats[2];
+   buffer->resources[2] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[2])
+      goto error;
+
+   return &buffer->base;
+
+error:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->resources[i], NULL);
+   FREE(buffer);
+
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
new file mode 100644
index 00000000000..291d15c1e9d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -0,0 +1,91 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_ycbcr_buffer_h
+#define vl_ycbcr_buffer_h
+
+#include <pipe/p_context.h>
+#include <pipe/p_video_decoder.h>
+
+#include "vl_defines.h"
+
+/**
+ * implementation of a planar ycbcr buffer
+ */
+
+/* planar buffer for vl data upload and manipulation */
+struct vl_video_buffer
+{
+   struct pipe_video_buffer base;
+   unsigned                 num_planes;
+   struct pipe_resource     *resources[VL_MAX_PLANES];
+   struct pipe_sampler_view *sampler_view_planes[VL_MAX_PLANES];
+   struct pipe_sampler_view *sampler_view_components[VL_MAX_PLANES];
+   struct pipe_surface      *surfaces[VL_MAX_PLANES];
+};
+
+/**
+ * get subformats for each plane
+ */
+const enum pipe_format *
+vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format);
+
+/**
+ * get maximum size of video buffers
+ */
+unsigned
+vl_video_buffer_max_size(struct pipe_screen *screen);
+
+/**
+ * check if video buffer format is supported for a codec/profile
+ * can be used as default implementation of screen->is_video_format_supported
+ */
+boolean
+vl_video_buffer_is_format_supported(struct pipe_screen *screen,
+                                    enum pipe_format format,
+                                    enum pipe_video_profile profile);
+                                    
+/**
+ * creates a video buffer, can be used as a standard implementation for pipe->create_video_buffer
+ */
+struct pipe_video_buffer *
+vl_video_buffer_create(struct pipe_context *pipe,
+                       enum pipe_format buffer_format,
+                       enum pipe_video_chroma_format chroma_format,
+                       unsigned width, unsigned height);
+
+/**
+ * extended create function, gets depth, usage and formats for each plane seperately
+ */
+struct pipe_video_buffer *
+vl_video_buffer_create_ex(struct pipe_context *pipe,
+                          unsigned width, unsigned height, unsigned depth,
+                          enum pipe_video_chroma_format chroma_format,
+                          const enum pipe_format resource_formats[VL_MAX_PLANES],
+                          unsigned usage);
+
+#endif /* vl_ycbcr_buffer_h */
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
new file mode 100644
index 00000000000..8c5b3aca47d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -0,0 +1,138 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
+ * which in turn is based on mpeg2dec. The following is the original copyright:
+ *
+ * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef vl_vlc_h
+#define vl_vlc_h
+
+struct vl_vlc
+{
+   uint32_t buf; /* current 32 bit working set of buffer */
+   int bits;     /* used bits in working set */
+   const uint8_t *ptr; /* buffer with stream data */
+   const uint8_t *max; /* ptr+len of buffer */
+};
+
+static inline void
+vl_vlc_restart(struct vl_vlc *vlc)
+{
+   vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3];
+   vlc->bits = -16;
+   vlc->ptr += 4;
+}
+
+static inline void
+vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
+{
+   vlc->ptr = data;
+   vlc->max = data + len;
+   vl_vlc_restart(vlc);
+}
+
+static inline bool
+vl_vlc_getbyte(struct vl_vlc *vlc)
+{
+   vlc->buf <<= 8;
+   vlc->buf |= vlc->ptr[0];
+   vlc->ptr++;
+   return vlc->ptr < vlc->max;
+}
+
+#define vl_vlc_getword(vlc, shift)                                      \
+do {                                                                    \
+   (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift);     \
+   (vlc)->ptr += 2;                                                     \
+} while (0)
+
+/* make sure that there are at least 16 valid bits in bit_buf */
+#define vl_vlc_needbits(vlc)                    \
+do {                                            \
+    if ((vlc)->bits >= 0) {                      \
+	vl_vlc_getword(vlc, (vlc)->bits);       \
+	(vlc)->bits -= 16;                      \
+    }                                           \
+} while (0)
+
+/* make sure that the full 32 bit of the buffer are valid */
+static inline void
+vl_vlc_need32bits(struct vl_vlc *vlc)
+{
+   vl_vlc_needbits(vlc);
+   if (vlc->bits > -8) {
+      unsigned n = -vlc->bits;
+      vlc->buf <<= n;
+      vlc->buf |= *vlc->ptr << 8;
+      vlc->bits = -8;
+      vlc->ptr++;
+   }
+   if (vlc->bits > -16) {
+      unsigned n = -vlc->bits - 8;
+      vlc->buf <<= n;
+      vlc->buf |= *vlc->ptr;
+      vlc->bits = -16;
+      vlc->ptr++;
+   }
+}
+
+/* remove num valid bits from bit_buf */
+#define vl_vlc_dumpbits(vlc, num)       \
+do {					\
+    (vlc)->buf <<= (num);		\
+    (vlc)->bits += (num);		\
+} while (0)
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num)))
+
+#endif /* vl_vlc_h */
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
new file mode 100644
index 00000000000..58cee0070d8
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -0,0 +1,604 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+
+#include <util/u_draw.h>
+#include <util/u_sampler.h>
+#include <util/u_inlines.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include <vl/vl_defines.h>
+#include <vl/vl_types.h>
+
+#include "vl_zscan.h"
+#include "vl_vertex_buffers.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_VTEX
+};
+
+const int vl_zscan_linear[] =
+{
+   /* Linear scan pattern */
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 9,10,11,12,13,14,15,
+   16,17,18,19,20,21,22,23,
+   24,25,26,27,28,29,30,31,
+   32,33,34,35,36,37,38,39,
+   40,41,42,43,44,45,46,47,
+   48,49,50,51,52,53,54,55,
+   56,57,58,59,60,61,62,63
+};
+
+const int vl_zscan_normal[] =
+{
+   /* Zig-Zag scan pattern */
+    0, 1, 8,16, 9, 2, 3,10,
+   17,24,32,25,18,11, 4, 5,
+   12,19,26,33,40,48,41,34,
+   27,20,13, 6, 7,14,21,28,
+   35,42,49,56,57,50,43,36,
+   29,22,15,23,30,37,44,51,
+   58,59,52,45,38,31,39,46,
+   53,60,61,54,47,55,62,63
+};
+
+const int vl_zscan_alternate[] =
+{
+   /* Alternate scan pattern */
+    0, 8,16,24, 1, 9, 2,10,
+   17,25,32,40,48,56,57,49,
+   41,33,26,18, 3,11, 4,12,
+   19,27,34,42,50,58,35,43,
+   51,59,20,28, 5,13, 6,14,
+   21,29,36,44,52,60,37,45,
+   53,61,22,30, 7,15,23,31,
+   38,46,54,62,39,47,55,63
+};
+
+static void *
+create_vert_shader(struct vl_zscan *zscan)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src scale;
+   struct ureg_src vrect, vpos, block_num;
+
+   struct ureg_dst tmp;
+   struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
+
+   signed i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / zscan->buffer_width,
+      (float)BLOCK_HEIGHT / zscan->buffer_height);
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+   block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM);
+
+   tmp = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
+
+   /*
+    * o_vpos.xy = (vpos + vrect) * scale
+    * o_vpos.zw = 1.0f
+    *
+    * tmp.xy = InstanceID / blocks_per_line
+    * tmp.x = frac(tmp.x)
+    * tmp.y = floor(tmp.y)
+    *
+    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
+    * o_vtex.y = vrect.y
+    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
+    */
+   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X),
+            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
+
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));
+
+   for (i = 0; i < zscan->num_channels; ++i) {
+      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
+               ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2)));
+
+      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
+               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
+               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
+   }
+
+   ureg_release_temporary(shader, tmp);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, zscan->pipe);
+}
+
+static void *
+create_frag_shader(struct vl_zscan *zscan)
+{
+   struct ureg_program *shader;
+   struct ureg_src vtex[zscan->num_channels];
+
+   struct ureg_src samp_src, samp_scan, samp_quant;
+
+   struct ureg_dst tmp[zscan->num_channels];
+   struct ureg_dst quant, fragment;
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
+
+   samp_src = ureg_DECL_sampler(shader, 0);
+   samp_scan = ureg_DECL_sampler(shader, 1);
+   samp_quant = ureg_DECL_sampler(shader, 2);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      tmp[i] = ureg_DECL_temporary(shader);
+   quant = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * tmp.x = tex(vtex, 1)
+    * tmp.y = vtex.z
+    * fragment = tex(tmp, 0) * quant
+    */
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));
+
+   for (i = 0; i < zscan->num_channels; ++i) {
+      ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
+      ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
+   }
+
+   ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
+   ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_release_temporary(shader, tmp[i]);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, zscan->pipe);
+}
+
+static bool
+init_shaders(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   zscan->vs = create_vert_shader(zscan);
+   if (!zscan->vs)
+      goto error_vs;
+
+   zscan->fs = create_frag_shader(zscan);
+   if (!zscan->fs)
+      goto error_fs;
+
+   return true;
+
+error_fs:
+   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
+
+error_vs:
+   return false;
+}
+
+static void
+cleanup_shaders(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
+   zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
+}
+
+static bool
+init_state(struct vl_zscan *zscan)
+{
+   struct pipe_blend_state blend;
+   struct pipe_rasterizer_state rs_state;
+   struct pipe_sampler_state sampler;
+   unsigned i;
+
+   assert(zscan);
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.gl_rasterization_rules = true;
+   zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
+   if (!zscan->rs_state)
+      goto error_rs_state;
+
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
+   if (!zscan->blend)
+      goto error_blend;
+
+   for (i = 0; i < 3; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
+      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+      sampler.compare_func = PIPE_FUNC_ALWAYS;
+      sampler.normalized_coords = 1;
+      zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
+      if (!zscan->samplers[i])
+         goto error_samplers;
+   }
+
+   return true;
+
+error_samplers:
+   for (i = 0; i < 2; ++i)
+      if (zscan->samplers[i])
+         zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
+
+   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
+
+error_blend:
+   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
+
+error_rs_state:
+   return false;
+}
+
+static void
+cleanup_state(struct vl_zscan *zscan)
+{
+   unsigned i;
+
+   assert(zscan);
+
+   for (i = 0; i < 3; ++i)
+      zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
+
+   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
+   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
+}
+
+struct pipe_sampler_view *
+vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line)
+{
+   const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
+
+   int patched_layout[64];
+
+   struct pipe_resource res_tmpl, *res;
+   struct pipe_sampler_view sv_tmpl, *sv;
+   struct pipe_transfer *buf_transfer;
+   unsigned x, y, i, pitch;
+   float *f;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH * blocks_per_line,
+      BLOCK_HEIGHT,
+      1
+   };
+
+   assert(pipe && layout && blocks_per_line);
+
+   for (i = 0; i < 64; ++i)
+      patched_layout[layout[i]] = i;
+
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
+   res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line;
+   res_tmpl.height0 = BLOCK_HEIGHT;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+
+   res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
+   if (!res)
+      goto error_resource;
+
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, res,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   if (!buf_transfer)
+      goto error_transfer;
+
+   pitch = buf_transfer->stride / sizeof(float);
+
+   f = pipe->transfer_map(pipe, buf_transfer);
+   if (!f)
+      goto error_map;
+
+   for (i = 0; i < blocks_per_line; ++i)
+      for (y = 0; y < BLOCK_HEIGHT; ++y)
+         for (x = 0; x < BLOCK_WIDTH; ++x) {
+            float addr = patched_layout[x + y * BLOCK_WIDTH] +
+               i * BLOCK_WIDTH * BLOCK_HEIGHT;
+
+            addr /= total_size;
+
+            f[i * BLOCK_WIDTH + y * pitch + x] = addr;
+         }
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!sv)
+      goto error_map;
+
+   return sv;
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   pipe_resource_reference(&res, NULL);
+
+error_resource:
+   return NULL;
+}
+
+bool
+vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
+              unsigned buffer_width, unsigned buffer_height,
+              unsigned blocks_per_line, unsigned blocks_total,
+              unsigned num_channels)
+{
+   assert(zscan && pipe);
+
+   zscan->pipe = pipe;
+   zscan->buffer_width = buffer_width;
+   zscan->buffer_height = buffer_height;
+   zscan->num_channels = num_channels;
+   zscan->blocks_per_line = blocks_per_line;
+   zscan->blocks_total = blocks_total;
+
+   if(!init_shaders(zscan))
+      return false;
+
+   if(!init_state(zscan)) {
+      cleanup_shaders(zscan);
+      return false;
+   }
+
+   return true;
+}
+
+void
+vl_zscan_cleanup(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   cleanup_shaders(zscan);
+   cleanup_state(zscan);
+}
+
+bool
+vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
+                     struct pipe_sampler_view *src, struct pipe_surface *dst)
+{
+   struct pipe_resource res_tmpl, *res;
+   struct pipe_sampler_view sv_tmpl;
+
+   assert(zscan && buffer);
+
+   memset(buffer, 0, sizeof(struct vl_zscan_buffer));
+
+   buffer->zscan = zscan;
+
+   pipe_sampler_view_reference(&buffer->src, src);
+
+   buffer->viewport.scale[0] = dst->width;
+   buffer->viewport.scale[1] = dst->height;
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
+
+   buffer->fb_state.width = dst->width;
+   buffer->fb_state.height = dst->height;
+   buffer->fb_state.nr_cbufs = 1;
+   pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
+
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_3D;
+   res_tmpl.format = PIPE_FORMAT_R8_UNORM;
+   res_tmpl.width0 = BLOCK_WIDTH * zscan->blocks_per_line;
+   res_tmpl.height0 = BLOCK_HEIGHT;
+   res_tmpl.depth0 = 2;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+
+   res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl);
+   if (!res)
+      return false;
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X;
+   buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!buffer->quant)
+      return false;
+
+   return true;
+}
+
+void
+vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
+{
+   assert(buffer);
+
+   pipe_sampler_view_reference(&buffer->src, NULL);
+   pipe_sampler_view_reference(&buffer->layout, NULL);
+   pipe_sampler_view_reference(&buffer->quant, NULL);
+   pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
+}
+
+void
+vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout)
+{
+   assert(buffer);
+   assert(layout);
+
+   pipe_sampler_view_reference(&buffer->layout, layout);
+}
+
+void
+vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
+                      const uint8_t intra_matrix[64],
+                      const uint8_t non_intra_matrix[64])
+{
+   struct pipe_context *pipe;
+   struct pipe_transfer *buf_transfer;
+   unsigned x, y, i, pitch;
+   uint8_t *intra, *non_intra;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH,
+      BLOCK_HEIGHT,
+      2
+   };
+
+   assert(buffer);
+   assert(intra_matrix);
+   assert(non_intra_matrix);
+
+   pipe = buffer->zscan->pipe;
+
+   rect.width *= buffer->zscan->blocks_per_line;
+
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, buffer->quant->texture,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   if (!buf_transfer)
+      goto error_transfer;
+
+   pitch = buf_transfer->stride;
+
+   non_intra = pipe->transfer_map(pipe, buf_transfer);
+   if (!non_intra)
+      goto error_map;
+
+   intra = non_intra + BLOCK_HEIGHT * pitch;
+
+   for (i = 0; i < buffer->zscan->blocks_per_line; ++i)
+      for (y = 0; y < BLOCK_HEIGHT; ++y)
+         for (x = 0; x < BLOCK_WIDTH; ++x) {
+            intra[i * BLOCK_WIDTH + y * pitch + x] = intra_matrix[x + y * BLOCK_WIDTH];
+            non_intra[i * BLOCK_WIDTH + y * pitch + x] = non_intra_matrix[x + y * BLOCK_WIDTH];
+         }
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   return;
+}
+
+void
+vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
+{
+   struct vl_zscan *zscan;
+
+   assert(buffer);
+
+   zscan = buffer->zscan;
+
+   zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
+   zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
+   zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 3, zscan->samplers);
+   zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
+   zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
+   zscan->pipe->set_fragment_sampler_views(zscan->pipe, 3, &buffer->src);
+   zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
+   zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
+   util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+}
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
new file mode 100644
index 00000000000..be12b8e873a
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -0,0 +1,103 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_zscan_h
+#define vl_zscan_h
+
+#include <pipe/p_compiler.h>
+#include <pipe/p_state.h>
+
+/*
+ * shader based zscan and quantification
+ * expect usage of vl_vertex_buffers as a todo list
+ */
+struct vl_zscan
+{
+   struct pipe_context *pipe;
+
+   unsigned buffer_width;
+   unsigned buffer_height;
+
+   unsigned num_channels;
+
+   unsigned blocks_per_line;
+   unsigned blocks_total;
+
+   void *rs_state;
+   void *blend;
+
+   void *samplers[3];
+
+   void *vs, *fs;
+};
+
+struct vl_zscan_buffer
+{
+   struct vl_zscan *zscan;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
+
+   struct pipe_sampler_view *src, *layout, *quant;
+   struct pipe_surface *dst;
+};
+
+extern const int vl_zscan_linear[];
+extern const int vl_zscan_normal[];
+extern const int vl_zscan_alternate[];
+
+struct pipe_sampler_view *
+vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line);
+
+bool
+vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
+              unsigned buffer_width, unsigned buffer_height,
+              unsigned blocks_per_line, unsigned blocks_total,
+              unsigned num_channels);
+
+void
+vl_zscan_cleanup(struct vl_zscan *zscan);
+
+bool
+vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
+                     struct pipe_sampler_view *src, struct pipe_surface *dst);
+
+void
+vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
+
+void
+vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout);
+
+void
+vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
+                      const uint8_t intra_matrix[64],
+                      const uint8_t non_intra_matrix[64]);
+
+void
+vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
+
+#endif