Initial commit for the linux vpe kernel changes.nvfx-vpe

Some notes about vpe: * Mpeg2 idct/mc decode engine. * Available on the nv40 only but I think nv30 has it (maybe other older cards too). Well, I cannot verify on nv30 since I don't have one. * It is composed of the hw engine, fifo, output surfaces and the mmio control registers. * Fifo pushbuffer can be allocated from vram or agp - Though, agp is not working right now. Also, I think pci *might* be supported but I cannot tell. * Output surfaces (for luma+chroma data) can be only be allocated from vram. The kernel portion works like so: * Channel creation involves setting up the engine via mmio and allocating the pushbuffer * All cmds are written in the client app to a pushbuffer - This can be simple user-space buffer or the actual hardware pushbuffer. * Client app calls the fire ioctl to kick off the decode engine. * Client app calls the query ioctl to see when an output surface is done rendering/being used. Kernel notes: * Both user and kernel submission of pushbuffers is supported - MMIO access is not allowed from user-space so you still need to call the fire ioctl. * Vram output surfaces (luma+chroma) must be pinned in memory until the render is done - The engine must finish using the output surfaces before they can be unpinned. A sequence type fence exists that can be used to see when the engine is done with a surface. An ioctl exists that lets you query this fence and free it when needed. The kernel will automatically unpin/release the surface once you fire some more commands. * Performance is not what I want right now - So, the code is not up to par with nvidia right now. I end up making the engine work too hard which causes stalls, etc. I am pretty sure the issue is getting the correct delays in there since I had this stuff working pretty good back when I originally worked on this. More documention for this exists in the nouveau_vpe_hw.h. Signed-off-by: Jimmy Rentz <jb17bsome@gmail.com>
author: Jimmy Rentz <jb17bsome@gmail.com> 2010-04-08 01:31:13 -0400
committer: Jimmy Rentz <jb17bsome@gmail.com> 2010-04-08 20:57:53 -0400
commit: 538d6ef8aac1cd861f6336e24e79a315fe58aba0 (patch)
tree: 922affc6aa30647e66937e5f78b1398201e21423
parent: de2f7caff0bfd6c487fbc28dbf7eafef8a993c56 (diff)
8 files changed, 1569 insertions, 1 deletions
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 453df3f6053f..4c4525a25522 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -22,7 +22,7 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
              nv50_cursor.o nv50_display.o nv50_fbcon.o \
              nv04_dac.o nv04_dfp.o nv04_tv.o nv17_tv.o nv17_tv_modes.o \
              nv04_crtc.o nv04_display.o nv04_cursor.o nv04_fbcon.o \
-             nv17_gpio.o nv50_gpio.o
+             nv17_gpio.o nv50_gpio.o nouveau_vd_vpe.o
 
 nouveau-$(CONFIG_DRM_NOUVEAU_DEBUG) += nouveau_debugfs.o
 nouveau-$(CONFIG_COMPAT) += nouveau_ioc32.o
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 1fc57ef58295..6ed9fae2a7ed 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -338,6 +338,14 @@ nouveau_channel_cleanup(struct drm_device *dev, struct drm_file *file_priv)
 		if (chan && chan->file_priv == file_priv)
 			nouveau_channel_free(chan);
 	}
+	
+	if (dev_priv->vpe_channel) {
+		NV_DEBUG(dev, "clearing VPE channel from file_priv\n");
+		struct nouveau_vd_vpe_channel *vpe_channel = dev_priv->vpe_channel;
+		
+		if (vpe_channel->file_priv == file_priv)
+			nouveau_vpe_channel_free(vpe_channel);
+	}
 }
 
 int
@@ -442,6 +450,10 @@ struct drm_ioctl_desc nouveau_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_CHANNEL_ALLOC, nouveau_vd_vpe_ioctl_channel_alloc, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_CHANNEL_FREE, nouveau_vd_vpe_ioctl_channel_free, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_PUSHBUF_FIRE, nouveau_vd_vpe_ioctl_pushbuf_fire, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_SURFACE_QUERY, nouveau_vd_vpe_ioctl_surface_query, DRM_AUTH),
 };
 
 int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index a251886a0ce6..ad3a736e983c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -115,6 +115,115 @@ nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
 	}
 }
 
+static 
+int nouveau_debugfs_vpe_channel_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct nouveau_vd_vpe_channel *chan = node->info_ent->data;
+	int i;
+	uint32_t val;
+	
+	seq_printf(m, "cpu fifo state:\n");
+	seq_printf(m, "           max: 0x%08x\n", chan->dma.max << 2);
+	seq_printf(m, "           cur: 0x%08x\n", chan->dma.cur << 2);
+	seq_printf(m, "           put: 0x%08x\n", chan->dma.put << 2);
+	seq_printf(m, "          free: 0x%08x\n", chan->dma.free << 2);
+					
+	seq_printf(m, "vpe fifo state:\n");
+	seq_printf(m, "           config: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_USER_CONFIG));
+	seq_printf(m, "           offset: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_USER_OFFSET));
+	seq_printf(m, "           size: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_USER_SIZE));
+	seq_printf(m, "           get: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_USER_GET));
+	seq_printf(m, "           put: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_USER_PUT));
+	seq_printf(m, "           get.seq: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_USER_GET));
+	seq_printf(m, "           put.seq: 0x%08x\n",
+					chan->dma.sequence);
+					
+	seq_printf(m, "vpe engine status:\n");
+	seq_printf(m, "           engine_config_1: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_CONFIG_1));
+	seq_printf(m, "           engine_config_2: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_CONFIG_2));
+	seq_printf(m, "           engine_setup_1: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_SETUP_1));
+	seq_printf(m, "           engine_setup_2: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_SETUP_2));
+	seq_printf(m, "           engine_reader_config: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_READER_CONFIG));
+	seq_printf(m, "           engine_status: 0x%08x\n",
+					nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_STATUS));
+	
+	seq_printf(m, "vpe decode surface config:\n");
+	val = nv_rd32(chan->dev, NV_VPE_MPEG2_SURFACE_INFO);
+	seq_printf(m, "           info: 0x%08X\n",
+					val);
+	val = nv_rd32(chan->dev, NV_VPE_MPEG2_CONTEXT_DIMENSIONS);
+	seq_printf(m, "           dimensions: width = %d, height = %d\n",
+					(val >> 16) & 0xFFF, val & 0xFFF);
+					
+	seq_printf(m, "vpe decode surface fb offsets:\n");				
+	for (i = 0; i < ARRAY_SIZE(chan->surface); i++) {
+		seq_printf(m, "         luma.[0x%08X] = 0x%08X\n",
+					i, nv_rd32(chan->dev, NV_VPE_MPEG2_LUMA_SURFACE_OFFSET_GET(i)));
+		seq_printf(m, "       chroma.[0x%08X] = 0x%08X\n",
+					i, nv_rd32(chan->dev, NV_VPE_MPEG2_CHROMA_SURFACE_OFFSET_GET(i)));
+	}
+					
+	return 0;
+}
+
+int nouveau_debugfs_vpe_channel_init(struct nouveau_vd_vpe_channel *chan)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct drm_minor *minor = chan->dev->primary;
+	int ret;
+
+	if (!dev_priv->debugfs.vpe_channel_root) {
+		dev_priv->debugfs.vpe_channel_root =
+			debugfs_create_dir("vpe_channel", minor->debugfs_root);
+		if (!dev_priv->debugfs.vpe_channel_root)
+			return -ENOENT;
+	}
+
+	strcpy(chan->debugfs.name, "0");
+	chan->debugfs.info.name = chan->debugfs.name;
+	chan->debugfs.info.show = nouveau_debugfs_vpe_channel_info;
+	chan->debugfs.info.driver_features = 0;
+	chan->debugfs.info.data = chan;
+
+	ret = drm_debugfs_create_files(&chan->debugfs.info, 1,
+				       dev_priv->debugfs.vpe_channel_root,
+				       chan->dev->primary);
+	if (ret == 0)
+		chan->debugfs.active = true;
+	return ret;
+}
+
+void
+nouveau_debugfs_vpe_channel_fini(struct nouveau_vd_vpe_channel *chan)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+
+	if (!chan->debugfs.active)
+		return;
+
+	drm_debugfs_remove_files(&chan->debugfs.info, 1, chan->dev->primary);
+	chan->debugfs.active = false;
+
+	if (chan == dev_priv->vpe_channel) {
+		debugfs_remove(dev_priv->debugfs.vpe_channel_root);
+		dev_priv->debugfs.vpe_channel_root = NULL;
+	}
+}
+
+
+
 static int
 nouveau_debugfs_chipset_info(struct seq_file *m, void *data)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index c20f52ec2d67..1040b363fe1c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -489,6 +489,38 @@ struct nv04_mode_state {
 	struct nv04_crtc_reg crtc_reg[2];
 };
 
+struct nouveau_vd_vpe_surface {
+	struct nouveau_bo *luma_bo;
+	struct nouveau_bo *chroma_bo;
+	uint32_t dma_sequence;
+};
+
+struct nouveau_vd_vpe_channel {
+	struct drm_device *dev;
+	struct drm_file *file_priv;
+	uint32_t width;
+	uint32_t height;
+	
+	/* Push buffer state */
+	struct {
+		uint32_t max;
+		uint32_t cur;
+		uint32_t put;
+		uint32_t free;
+		uint32_t sequence;
+		/* access via pushbuf_bo */
+	} dma;
+
+	struct nouveau_bo *pushbuf_bo;
+	struct nouveau_vd_vpe_surface surface[8];
+	
+	struct {
+		bool active;
+		char name[32];
+		struct drm_info_list info;
+	} debugfs;
+};
+
 enum nouveau_card_type {
 	NV_04      = 0x00,
 	NV_10      = 0x10,
@@ -620,7 +652,10 @@ struct drm_nouveau_private {
 
 	struct {
 		struct dentry *channel_root;
+		struct dentry *vpe_channel_root;
 	} debugfs;
+	
+	struct nouveau_vd_vpe_channel *vpe_channel;
 };
 
 static inline struct drm_nouveau_private *
@@ -666,6 +701,16 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 	(ch) = nv->fifos[(id)];                                  \
 } while (0)
 
+#define NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(id, ch) do {    \
+	struct drm_nouveau_private *nv = dev->dev_private;       \
+	if (nv->vpe_channel && (nv->vpe_channel->file_priv != id) ) {           \
+		NV_ERROR(dev, "pid %d doesn't own vpe channel\n", \
+			 DRM_CURRENTPID);                  \
+		return -EPERM;                                   \
+	}                                                        \
+	(ch) = nv->vpe_channel;                                  \
+} while (0)
+
 /* nouveau_drv.c */
 extern int nouveau_noagp;
 extern int nouveau_duallink;
@@ -818,6 +863,8 @@ extern int  nouveau_debugfs_init(struct drm_minor *);
 extern void nouveau_debugfs_takedown(struct drm_minor *);
 extern int  nouveau_debugfs_channel_init(struct nouveau_channel *);
 extern void nouveau_debugfs_channel_fini(struct nouveau_channel *);
+extern int nouveau_debugfs_vpe_channel_init(struct nouveau_vd_vpe_channel *);
+extern void nouveau_debugfs_vpe_channel_fini(struct nouveau_vd_vpe_channel *);
 #else
 static inline int
 nouveau_debugfs_init(struct drm_minor *minor)
@@ -839,6 +886,17 @@ static inline void
 nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
 {
 }
+
+static inline int
+nouveau_debugfs_vpe_channel_init(struct nouveau_vd_vpe_channel *chan)
+{
+	return 0;
+}
+
+static inline void
+nouveau_debugfs_vpe_channel_fini(struct nouveau_vd_vpe_channel *chan)
+{
+}
 #endif
 
 /* nouveau_dma.c */
@@ -1156,6 +1214,17 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
 				  struct drm_file *);
+				  			  
+/* nouveau_vd_vpe.c */
+extern void nouveau_vpe_channel_free(struct nouveau_vd_vpe_channel *vpe_channel);
+extern int nouveau_vd_vpe_ioctl_channel_alloc(struct drm_device *, void *,
+				  struct drm_file *);
+extern int nouveau_vd_vpe_ioctl_channel_free(struct drm_device *, void *,
+				  struct drm_file *);
+extern int nouveau_vd_vpe_ioctl_pushbuf_fire(struct drm_device *, void *,
+				  struct drm_file *);
+extern int nouveau_vd_vpe_ioctl_surface_query(struct drm_device *, void *,
+				  struct drm_file *);
 
 /* nv17_gpio.c */
 int nv17_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index aa9b310e41be..29f0d73d077f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -131,6 +131,37 @@
 #define NV04_PTIMER_TIME_1                                 0x00009410
 #define NV04_PTIMER_ALARM_0                                0x00009420
 
+/* The NV VPE MPEG2 control registers that exist on NV40 and NV30 and 
+ * some other older boards possibly.*/
+#define NV_VPE_MPEG2_ENGINE_CONFIG_1      				   0x0000B0E0
+#define NV_VPE_MPEG2_ENGINE_CONFIG_2      				   0x0000B0E8
+#define NV_VPE_MPEG2_ENGINE_SETUP_1       				   0x0000B100 
+#define NV_VPE_MPEG2_ENGINE_SETUP_2       				   0x0000B140 
+#define NV_VPE_MPEG2_ENGINE_STATUS        				   0x0000B200 
+#define NV_VPE_MPEG2_ENGINE_READER_CONFIG 				   0x0000B204   
+#define NV_VPE_MPEG2_USER_CONFIG  		   		           0x0000B300
+#	define NV_VPE_MPEG2_USER_NOT_PRESENT     	           0x020F0200
+#	define NV_VPE_MPEG2_USER_PRESENT     	                0x02001ec1 
+#	define NV_VPE_MPEG2_USER_VRAM                           (0 << 16)
+#	define NV_VPE_MPEG2_USER_AGP_OR_PCI                     (1 << 16)
+#	define NV_VPE_MPEG2_USER_AGP_OR_PCI_READY               (2 << 16)
+/* Complete guess here about pcie.*/	      
+#	define NV_VPE_MPEG2_USER_PCIE                           (8 << 16)
+#define NV_VPE_MPEG2_UNKNOWN_SETUP_3 					   0x0000B314
+#define NV_VPE_MPEG2_USER_OFFSET 				   	       0x0000B320
+#define NV_VPE_MPEG2_USER_SIZE     				           0x0000B324
+#define NV_VPE_MPEG2_USER_PUT      				           0x0000B328
+#define NV_VPE_MPEG2_USER_GET      				           0x0000B330
+#define NV_VPE_MPEG2_ENGINE_CONTROL      				   0x0000B32C
+#	define NV_VPE_MPEG2_ENGINE_STOP   				     0
+#	define NV_VPE_MPEG2_ENGINE_START  				   	 1      
+#define NV_VPE_MPEG2_SEQUENCE_GET        				   0x0000B340
+#define NV_VPE_MPEG2_SURFACE_INFO        				   0x0000B378
+#define NV_VPE_MPEG2_CONTEXT_DIMENSIONS 				   0x0000B37C	    
+#define NV_VPE_MPEG2_LUMA_SURFACE_OFFSET_GET(surface)	   (0x0000B450 + (surface * 8) )
+#define NV_VPE_MPEG2_CHROMA_SURFACE_OFFSET_GET(surface)    (0x0000B454 + (surface * 8) )	
+#define NV_VPE_MPEG2_ENGINE_STATUS_1  					   0x0000B848
+
 #define NV04_PFB_CFG0                                      0x00100200
 #define NV04_PFB_CFG1                                      0x00100204
 #define NV40_PFB_020C                                      0x0010020C
diff --git a/drivers/gpu/drm/nouveau/nouveau_vd_vpe.c b/drivers/gpu/drm/nouveau/nouveau_vd_vpe.c
new file mode 100644
index 000000000000..52a2ed9f76de
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_vd_vpe.c
@@ -0,0 +1,1147 @@
+/*
+ * Copyright (C) 2010 Jimmy Rentz
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_drm.h"
+#include "nouveau_vpe_hw.h"
+
+/* VPE MPEG2 HW notes:
+ * - There is a 64byte fetch size.  That is why each set of commands must
+ * be aligned on a 64 byte boundary for firing.
+ * - One fetch of cmds seem to process in 1 microsecond on my nv4e.  
+ * However, I presume this can vary based on the hw and nature of commands.
+ * - Each firing of a set of commands must be followed by a small delay.
+ * The main reason is to avoid overwhelming the hw.  
+ * The delays below were determined from testing/measuring.  I doubt they
+   are perfect and they could be tweaked a bit.*/
+
+/* Channel/Surface init commands process in little or no time.*/
+#define VPE_UDELAY_FIRE_INIT        4
+
+/* Normal firing needs this type of delay.*/
+#define VPE_UDELAY_FIRE_NORMAL      35
+
+/* Need a longer delay at the end of the fifo since it takes longer.*/
+#define VPE_UDELAY_FIRE_END        100
+
+/* Set if you want to validate vpe user cmds.
+ * Otherwise, they are copied asis.
+ * The reason this exists is because a user could set a vpe surface to 
+ * point to the visible framebuffer, etc.  However, the user could never
+ * make a vpe surface use a gart address since it isn't supported by the
+ * hardware.*/
+/*#define NOUVEAU_VPE_VALIDATE_USER_CMDS*/
+
+/* All these functions up here need to be exported somehow.*/
+
+/* Needed to copy userspace pushbuffers that are sent to the vpe hw.*/
+static inline void *
+_u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
+{
+	void *mem;
+	void __user *userptr = (void __force __user *)(uintptr_t)user;
+
+	mem = kmalloc(nmemb * size, GFP_KERNEL);
+	if (!mem)
+		return ERR_PTR(-ENOMEM);
+
+	if (DRM_COPY_FROM_USER(mem, userptr, nmemb * size)) {
+		kfree(mem);
+		return ERR_PTR(-EFAULT);
+	}
+
+	return mem;
+}
+
+/* Internal */			 
+static inline void
+nouveau_vpe_cmd_write(struct nouveau_vd_vpe_channel *vpe_channel, uint32_t value)
+{
+	nouveau_bo_wr32(vpe_channel->pushbuf_bo, vpe_channel->dma.cur++, value);
+	vpe_channel->dma.free--;
+	
+	if (vpe_channel->dma.cur == vpe_channel->dma.max) {
+		vpe_channel->dma.cur = 0;
+		vpe_channel->dma.free = vpe_channel->dma.max;
+	}
+}
+
+static inline void
+nouveau_vpe_cmd_align(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	uint32_t nop_count;
+	uint32_t cmd_sequence_count;
+	int i;
+	
+	/* Alignment is needed when ending cmd sequences.*/
+	cmd_sequence_count = vpe_channel->dma.cur - vpe_channel->dma.put;
+	nop_count = ALIGN(cmd_sequence_count, NV_VPE_CMD_ALIGNMENT);
+	nop_count -= cmd_sequence_count;
+
+	for (i = 0; i < nop_count; i++)
+		nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_NOP << NV_VPE_CMD_TYPE_SHIFT);	
+}
+
+static inline void
+nouveau_vpe_fire(struct nouveau_vd_vpe_channel *vpe_channel, uint64_t delay)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	uint32_t put;
+	
+	DRM_MEMORYBARRIER();
+	
+	put = (vpe_channel->dma.cur / NV_VPE_CMD_ALIGNMENT) * NV_VPE_CMD_ALIGNMENT;
+	
+	nouveau_bo_rd32(vpe_channel->pushbuf_bo, put);
+	
+	nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, put << 2);
+	
+	vpe_channel->dma.put = put;
+	
+	if (delay)
+		DRM_UDELAY(delay);
+}
+
+static uint32_t
+nouveau_vpe_channel_read_get(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	
+	return nv_rd32(dev, NV_VPE_MPEG2_USER_GET) >> 2;
+}
+
+static int
+nouveau_vpe_channel_wait(struct nouveau_vd_vpe_channel *vpe_channel, uint32_t put)
+{
+	uint32_t get;
+	uint32_t prev_get = 0;
+	bool is_beg = (put == 0) || (vpe_channel->dma.put == 0);
+	uint32_t cnt = 0;
+	
+	get = prev_get = nouveau_vpe_channel_read_get(vpe_channel);
+	
+	while ( (!is_beg && (get < put) ) ||
+		    (is_beg && (get != 0) ) ) {
+		    
+		/* reset counter as long as GET is still advancing, this is
+		 * to avoid misdetecting a GPU lockup if the GPU happens to
+		 * just be processing an operation that takes a long time
+		 */
+		get = nouveau_vpe_channel_read_get(vpe_channel);
+		if (get != prev_get) {
+			prev_get = get;
+			cnt = 0;
+		}
+
+		if ((++cnt & 0xff) == 0) {
+			DRM_UDELAY(1);
+			if (cnt > 100000) {
+				NV_ERROR(vpe_channel->dev, "nouveau_vpe_channel_wait - lockup. cur = 0x%08X, put = 0x%08X, get = 0x%08X, put.seq = %u, get.seq = %u, ec1 = 0x%08X, ec2 = 0x%08X, es = 0x%08X.\n", 
+					vpe_channel->dma.cur, put, nouveau_vpe_channel_read_get(vpe_channel), vpe_channel->dma.sequence, 
+					nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_SEQUENCE_GET),
+					nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_ENGINE_CONFIG_1), 
+					nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_ENGINE_CONFIG_2),
+					nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_ENGINE_STATUS));
+				return -EBUSY;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void
+nouveau_vpe_cmd_end_sequence_header(struct nouveau_vd_vpe_channel *vpe_channel)
+{	
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_END_SEQUENCE << NV_VPE_CMD_TYPE_SHIFT
+					| NV_VPE_CMD_SEQUENCE << 24);
+
+	nouveau_vpe_cmd_write(vpe_channel, ++vpe_channel->dma.sequence);
+}
+
+static void
+nouveau_vpe_cmd_end_sequence_trailer(struct nouveau_vd_vpe_channel *vpe_channel)
+{	
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_END_SEQUENCE << NV_VPE_CMD_TYPE_SHIFT);
+}
+
+static void
+nouveau_vpe_cmd_end_sequence_finish(struct nouveau_vd_vpe_channel *vpe_channel)
+{	
+	nouveau_vpe_cmd_align(vpe_channel);
+	nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_NORMAL);
+}
+
+#ifndef NOUVEAU_VPE_VALIDATE_USER_CMDS
+static void
+_OUT_RINGp(struct nouveau_vd_vpe_channel *chan, const void *data, unsigned nr_dwords)
+{
+	bool is_iomem;
+	u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
+	mem = &mem[chan->dma.cur];
+	if (is_iomem)
+		memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4);
+	else
+		memcpy(mem, data, nr_dwords * 4);
+	chan->dma.cur += nr_dwords;
+}
+#endif
+
+static int
+nouveau_vpe_cmd_write_user_batch(struct nouveau_vd_vpe_channel *chan, 
+									const void *data, unsigned nr_dwords)
+{
+#ifdef NOUVEAU_VPE_VALIDATE_USER_CMDS
+	bool is_iomem;
+	u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
+	u32 *user_data = (u32*) data;
+	uint32_t val;
+	int i;
+	bool in_mb_db = false;
+	bool at_end_mb_db = false;
+	
+	mem = &mem[chan->dma.cur];
+	
+	for (i = 0; i < nr_dwords; i++) {
+		val = user_data[i];
+		
+		if (in_mb_db) {
+			if (at_end_mb_db) {
+			  if (val == (NV_VPE_CMD_DCT_SEPARATOR << NV_VPE_CMD_TYPE_SHIFT))
+				at_end_mb_db = false;
+			  else
+				in_mb_db = false;
+			}
+			else if (val & NV_VPE_DCT_BLOCK_TERMINATOR) 
+					at_end_mb_db = true;
+		}
+		if (!in_mb_db) {
+			switch (val & 0xF0000000) {
+				case NV_VPE_CMD_DCT_SEPARATOR << NV_VPE_CMD_TYPE_SHIFT:
+					in_mb_db = true;
+					at_end_mb_db = false;
+					break;
+				case NV_VPE_CMD_DCT_CHROMA_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+				case NV_VPE_CMD_DCT_LUMA_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+				case NV_VPE_CMD_DCT_COORDINATE << NV_VPE_CMD_TYPE_SHIFT:
+				case NV_VPE_CMD_CHROMA_MOTION_VECTOR_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+				case NV_VPE_CMD_LUMA_MOTION_VECTOR_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+				case NV_VPE_CMD_MOTION_VECTOR << NV_VPE_CMD_TYPE_SHIFT:
+				case NV_VPE_CMD_NOP << NV_VPE_CMD_TYPE_SHIFT:
+					break;
+				default:
+					NV_ERROR(chan->dev, "vpe - invalid cmd 0x%08X detected. Aborting cmd sequence.\n", 
+							val);
+				return -EINVAL;
+			}
+		}
+		
+		/* Always iomem/vram for vpe.*/
+		iowrite32_native(val, (void __force __iomem *)&mem[i]);
+	}
+	
+	chan->dma.cur += nr_dwords;
+#else
+	_OUT_RINGp(chan, data, nr_dwords);
+#endif
+	
+	return 0;
+}
+
+static bool
+nouveau_vpe_validate_surface(struct nouveau_vd_vpe_channel *vpe_channel, 
+							    uint32_t handle, 
+							    struct nouveau_bo *target_nvbo)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	bool result;
+	
+	gem = drm_gem_object_lookup(dev, vpe_channel->file_priv, handle);
+	if (unlikely(!gem)) {
+		result = false;
+		NV_ERROR(dev, "nouveau_vpe_validate_gem_handle - Unknown handle 0x%08X.\n", handle);
+		goto out;
+	}
+	nvbo = nouveau_gem_object(gem);
+	if (unlikely(!nvbo || (nvbo != target_nvbo))) {
+		result = false;
+		NV_ERROR(dev, "nouveau_vpe_validate_gem_handle - Unknown bo 0x%08X.\n", handle);
+		goto out;
+	}
+	
+	result = true;
+		
+out:
+
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gem);
+	mutex_unlock(&dev->struct_mutex);
+	
+	return result;
+}
+
+static int
+nouveau_vpe_pin_surface(struct nouveau_vd_vpe_channel *vpe_channel, uint32_t handle,
+                        uint32_t required_size, struct nouveau_bo **pnvbo)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	uint32_t mem_type;
+	unsigned long size;
+	int ret;
+	
+	gem = drm_gem_object_lookup(dev, vpe_channel->file_priv, handle);
+	if (!gem) {
+		NV_ERROR(dev, "nouveau_vpe_pin_surface - Unknown handle 0x%08X.\n", handle);
+		return -EINVAL;
+	}
+	nvbo = nouveau_gem_object(gem);
+	if (!nvbo) {
+		ret = -EINVAL;
+		NV_ERROR(dev, "nouveau_vpe_pin_surface - Unknown bo 0x%08X.\n", handle);
+		goto out;
+	}
+	ret = ttm_bo_reserve(&nvbo->bo, false, false, false, 0);
+	if (ret)
+		goto out;
+		
+	mem_type = nvbo->bo.mem.mem_type;
+	size = nvbo->bo.mem.size;
+	
+	ttm_bo_unreserve(&nvbo->bo);
+		
+	if (mem_type != TTM_PL_VRAM) {
+		ret = -EINVAL;
+		NV_ERROR(dev, "nouveau_vpe_pin_surface - bo must be in vram.\n");
+		goto out;
+	}
+	if (size < required_size) {
+		ret = -EINVAL;
+		NV_ERROR(dev, "nouveau_vpe_pin_surface - bo 0x%08X has size %lu, required %u.\n", handle,
+			size, required_size);
+		goto out;
+	}
+	ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM);
+	if (ret) {
+		NV_ERROR(dev, "nouveau_vpe_pin_surface - Could not pin handle 0x%08X.\n", handle);
+		goto out;
+	}
+	
+	*pnvbo = nvbo;
+	ret = 0;
+	
+out:
+	
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gem);
+	mutex_unlock(&dev->struct_mutex);
+	
+	return ret;
+}
+
+static void
+nouveau_vpe_unpin_surface(struct nouveau_vd_vpe_channel *vpe_channel, struct nouveau_bo *nvbo)
+{
+	if (nvbo && nvbo->pin_refcnt)
+		nouveau_bo_unpin(nvbo);
+}
+
+static void
+nouveau_vpe_reset_pushbuf_to_start(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	int i;
+	uint32_t nop_count;
+		
+	if (vpe_channel->dma.cur) {
+		/* Just write nops till the end since alignment is a non-issue
+		 * here.*/
+		nop_count = vpe_channel->dma.max - vpe_channel->dma.cur;
+		
+		for (i = 0; i < nop_count; i++)
+			nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_NOP << NV_VPE_CMD_TYPE_SHIFT);
+	}
+	
+	nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_END);
+}
+
+static int 
+nouveau_vpe_channel_pushbuf_alloc(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	struct nouveau_bo *pushbuf_bo;
+	int ret;
+	uint32_t flags;
+	
+	if (0) 
+	/*dev_priv->gart_info.type == NOUVEAU_GART_AGP)
+	 * agp init is broken right now it seems.*/
+		flags = TTM_PL_FLAG_TT;
+	else
+		flags = TTM_PL_FLAG_VRAM;
+
+	ret = nouveau_gem_new(dev, NULL, NV_VPE_PUSHBUFFER_SIZE, 0,
+						 flags, 0, 0x0000, false, true, &pushbuf_bo);
+	if (ret)
+		return ret;
+
+	ret = nouveau_bo_pin(pushbuf_bo, flags);
+	if (ret)
+		goto out_err;
+
+	ret = nouveau_bo_map(pushbuf_bo);
+	if (ret)
+		goto out_err;
+	
+	vpe_channel->pushbuf_bo = pushbuf_bo;
+	vpe_channel->dma.max  = vpe_channel->pushbuf_bo->bo.mem.size >> 2;
+	vpe_channel->dma.free = vpe_channel->dma.max;
+		
+out_err:
+	if (ret) {
+		mutex_lock(&dev->struct_mutex);
+		drm_gem_object_unreference(pushbuf_bo->gem);
+		mutex_unlock(&dev->struct_mutex);
+	}
+	
+	return ret;
+}
+
+static int 
+nouveau_vpe_channel_hw_init(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	uint32_t value;
+	struct drm_device *dev = vpe_channel->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t pushbuf_offset = 0;
+
+	/* Turn off the mpeg2 decoder.*/
+	nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG, 
+		NV_VPE_MPEG2_USER_NOT_PRESENT);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0);
+	nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONTROL);
+	
+	/* Pause a tiny bit to let the hardware reset.  
+	 * This might be needed.*/
+	DRM_UDELAY(100);
+	
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0x01010000);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0x01010000);
+	nv_wr32(dev, NV_VPE_MPEG2_UNKNOWN_SETUP_3, 0x100);
+	
+	/* Some type of mpeg2 engine config.
+	 * It seems that the hardware automatically sets this to 0x20.
+	 * However, I have an nv4a mmio trace where the nvidia driver
+	 * actually writes 0x20.  
+	 * Also I have noticed that when the mpeg2 engine hw locks
+	 * up after playing video, this register gets reset to 0x1.
+	 */
+	if (nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_1) != 0x20)
+		nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_1, 0x20);
+	if (nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_2) != 0x20)
+		nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_2, 0x20);
+
+	/* Make sure the decoder is ready.
+	 * So, we check each status register.  
+	 * Well, that is what these registers seem to be.
+	 */
+	value = nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS);
+
+	/* Is the hw still busy? */
+	if (value & 0x1)
+		if (!nouveau_wait_until(dev, 10000000, NV_VPE_MPEG2_ENGINE_STATUS, 
+							0x0FFFFFFF, 0)) {
+			NV_ERROR(dev, "nouveau_vpe_channel_hw_init - unknown status value of 0x%08X for engine status reg. Must exit.\n", 
+					nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS));
+			return -EINVAL;
+		}
+
+	/* Make sure the decoder is ready. */
+	value = nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS_1);
+
+	/* If we got this value then we might have a problem. */
+	if (value & 0x200) {
+		NV_ERROR(dev, "nouveau_vpe_channel_hw_init - unknown status value of 0x%08X for engine status 1 reg. Must exit.\n", 
+					value);
+		return -EINVAL;
+	}
+
+	/* Is the status reg still busy? */
+	if (value & 0x1)
+		if (!nouveau_wait_until(dev, 10000000, NV_VPE_MPEG2_ENGINE_STATUS_1, 
+							0x0FFFFFFF, 0)) {
+			NV_ERROR(dev, "nouveau_vpe_channel_hw_init - unknown status value of 0x%08X for engine status 1 reg. Must exit.\n", 
+					nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS_1));
+			return -EINVAL;
+		}
+
+	/* Reset the mpeg2 pushbuffer/user. */
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, 0);
+
+	/* The setup of the command buffer is different for agp and pci/pcie. 
+	 * NOTE: Agp is not working right now so it is disabled.*/
+	if (vpe_channel->pushbuf_bo->bo.mem.mem_type == TTM_PL_TT) {
+		
+		pushbuf_offset = lower_32_bits(dev_priv->gart_info.aper_base) + 
+		    lower_32_bits(vpe_channel->pushbuf_bo->bo.offset);
+		
+		nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+				NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_AGP_OR_PCI);
+		/* This needs the agp aperature in the offset.*/
+		nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET,
+				pushbuf_offset);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE,
+				vpe_channel->dma.max << 2);
+		nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0x01010000);
+		nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0x01010000);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+				NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_AGP_OR_PCI | NV_VPE_MPEG2_USER_AGP_OR_PCI_READY);
+	} else {
+		/* For pci, only the fb offset is used.
+		 * However, have to init the pushbuffer/user using the fb size? not sure here.
+		 * This is not related to decoding but strictly for reading from
+		 * the pushbuffer/user.  It might be caching related. 
+		 * The nv driver uses different values but it looks fb size related.
+		 * So, I will go with that for now.
+		 */
+		pushbuf_offset = lower_32_bits(vpe_channel->pushbuf_bo->bo.offset);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG, 
+				NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_VRAM);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, dev_priv->fb_available_size);
+		nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0x01010000);
+		nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0x01010000);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG, 
+				NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_VRAM);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET,
+				pushbuf_offset);
+		nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE,
+				vpe_channel->dma.max << 2);
+	}
+
+	/* Start up the mpeg2 engine */
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_START);
+	nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONTROL);
+	
+	return 0;
+}
+
+static int 
+nouveau_vpe_channel_init(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	int ret;
+	int i;
+	uint32_t value;
+	
+	/* Reset decoder to the initial state.*/
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL << NV_VPE_CMD_TYPE_SHIFT
+				| NV_VPE_CMD_INIT_CHANNEL_ACCEL << 24 );
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL << NV_VPE_CMD_TYPE_SHIFT);
+	/* NOTE: The surface group info value might be tiling related. */
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL << NV_VPE_CMD_TYPE_SHIFT
+			| NV_VPE_CMD_INIT_CHANNEL_SURFACE_GROUP_INFO << 24);
+			
+	nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+	/* No body/trailer for the init cmd.*/
+	nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+	
+	ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+	if (ret)
+		return ret;
+			
+	/* Clear out all surface references.*/
+	for (i = 0; i < NV_VPE_MAX_SURFACES; i++) {
+		
+		nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_SURFACE << NV_VPE_CMD_TYPE_SHIFT
+			| NV_VPE_CMD_INIT_SURFACE_LUMA(i));
+		nouveau_vpe_cmd_align(vpe_channel);
+		
+		nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_INIT);
+		ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+		if (ret)
+			return ret;
+			
+		nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_SURFACE << NV_VPE_CMD_TYPE_SHIFT
+			| NV_VPE_CMD_INIT_SURFACE_CHROMA(i));
+		nouveau_vpe_cmd_align(vpe_channel);
+		
+		nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_INIT);
+		ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+		if (ret)
+			return ret;
+	}
+	
+	/* Init the decoder channel.*/
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL << NV_VPE_CMD_TYPE_SHIFT
+				    | NV_VPE_CMD_INIT_CHANNEL_ACCEL << 24 
+				    /* If IDCT is disabled then only MC is done.*/
+				    | NV_VPE_CMD_INIT_CHANNEL_ACCEL_IDCT);
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL << NV_VPE_CMD_TYPE_SHIFT
+			| (vpe_channel->width << 12 | vpe_channel->height));
+	/* NOTE: The surface group info value might be tiling related. */
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL << NV_VPE_CMD_TYPE_SHIFT
+			| NV_VPE_CMD_INIT_CHANNEL_SURFACE_GROUP_INFO << 24
+			| (ALIGN(vpe_channel->width, 112) / 32));
+			
+	nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+	/* No body/trailer for the init cmd.*/
+	nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+	
+	ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+	if (ret)
+		return ret;
+		
+	/* Make sure hardware context is setup correctly */
+	
+	value = nv_rd32(dev, NV_VPE_MPEG2_SURFACE_INFO);
+	if (value != ( 0x10000 | (ALIGN(vpe_channel->width, 128) ) ) ) {
+		NV_ERROR(dev, "nouveau_vpe_channel_init - channel surface setup wrong for width = %d, height = %d, got = 0x%08X.\n", 
+				vpe_channel->width, vpe_channel->height, value);
+		return -EINVAL;
+	}
+
+	value = nv_rd32(dev, NV_VPE_MPEG2_CONTEXT_DIMENSIONS);
+	if (value != ( ( (vpe_channel->width & 0xFFF) << 16) | (vpe_channel->height & 0xFFF) ) ) {
+		NV_ERROR(dev, "nouveau_vpe_channel_init - channel dimensions wrong for width = %d, height = %d, got = 0x%08X.\n", 
+				vpe_channel->width, vpe_channel->height, value);
+		return -EINVAL;
+	}
+	
+	return 0;
+}
+
+static void 
+nouveau_vpe_channel_shutdown(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+	/* No body/trailer for the init cmd.*/
+	nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+}
+
+static void 
+nouveau_vpe_channel_hw_shutdown(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	
+	nouveau_vpe_channel_shutdown(vpe_channel);
+	
+	nouveau_vpe_channel_wait(vpe_channel,  vpe_channel->dma.cur);
+	
+	/* Just a slight pause. This might not be needed. */
+	DRM_UDELAY(100);
+	
+	/* Turn off the mpeg2 decoder.*/
+	nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG, 
+		NV_VPE_MPEG2_USER_NOT_PRESENT);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0);
+	nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0);
+	nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONTROL);
+}
+
+static int 
+nouveau_vpe_channel_alloc(struct drm_device *dev,
+				struct drm_nouveau_vd_vpe_channel_alloc *req, 
+				struct drm_file *file_priv)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_vd_vpe_channel *vpe_channel;
+	int ret;
+	
+	if (dev_priv->vpe_channel) {
+		NV_ERROR(dev, "vpe channel is already in use.\n");
+		return -EPERM;   
+	}
+	
+	if ( (dev_priv->card_type != NV_40) &&
+	     (dev_priv->card_type != NV_30) ) {
+		NV_ERROR(dev, "vpe is not supported on NV%d.\n", 
+			dev_priv->card_type);
+		return -EINVAL;   
+	}
+	
+	if ( (req->width < NV_VPE_MIN_WIDTH) ||
+	     (req->width > NV_VPE_MAX_WIDTH) ||
+	     (req->height < NV_VPE_MIN_HEIGHT) ||
+	     (req->height > NV_VPE_MAX_HEIGHT) ) {
+		NV_ERROR(dev, "vpe does not support width = %d, height = %d\n", req->width,
+		req->height);
+		return -EINVAL;
+	}
+	
+	vpe_channel = kzalloc(sizeof(*vpe_channel), GFP_KERNEL);
+	if (!vpe_channel)
+		return -ENOMEM;
+		
+	req->width = ALIGN(req->width, 16);
+	req->height = ALIGN(req->height, 16);	
+	vpe_channel->dev = dev;
+	vpe_channel->width = req->width;
+	vpe_channel->height = req->height;
+	
+	ret = nouveau_vpe_channel_pushbuf_alloc(vpe_channel);
+	if (ret)
+		goto out_err;
+		
+	ret = nouveau_vpe_channel_hw_init(vpe_channel);
+	if (ret)
+		goto out_err;
+	
+	ret = nouveau_vpe_channel_init(vpe_channel);
+	if (ret)
+		goto out_err;
+		
+	ret = drm_gem_handle_create(file_priv, vpe_channel->pushbuf_bo->gem,
+				    &req->pushbuf_handle);
+	if (ret)
+		goto out_err;
+			
+	nouveau_debugfs_vpe_channel_init(vpe_channel);
+	
+	vpe_channel->file_priv = file_priv;
+	dev_priv->vpe_channel = vpe_channel;
+	
+	NV_INFO(dev, "intialized vpe channel\n");
+		
+out_err:
+	if (ret)
+		nouveau_vpe_channel_free(vpe_channel);
+			
+	return ret;
+}
+
+void 
+nouveau_vpe_channel_free(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+	struct drm_device *dev;
+	struct drm_nouveau_private *dev_priv;
+	struct nouveau_vd_vpe_surface *vpe_surface;
+	int i;
+	
+	if (!vpe_channel)
+		return;
+		
+	dev = vpe_channel->dev;
+	dev_priv = dev->dev_private;
+
+	nouveau_vpe_channel_hw_shutdown(vpe_channel);
+	
+	nouveau_debugfs_vpe_channel_fini(vpe_channel);
+	
+	for (i = 0; i < ARRAY_SIZE(vpe_channel->surface); i++) {
+		vpe_surface = &vpe_channel->surface[i];
+		if (vpe_surface->luma_bo)
+			nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->luma_bo);
+		if (vpe_surface->chroma_bo)
+			nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->chroma_bo);
+	}
+	
+	if (vpe_channel->pushbuf_bo) {
+		nouveau_bo_unmap(vpe_channel->pushbuf_bo);
+		mutex_lock(&vpe_channel->dev->struct_mutex);
+		drm_gem_object_unreference(vpe_channel->pushbuf_bo->gem);
+		mutex_unlock(&vpe_channel->dev->struct_mutex);
+	}
+	
+	NV_INFO(vpe_channel->dev, "shutdown vpe channel\n");
+	
+	dev_priv->vpe_channel = NULL;
+	
+	kfree(vpe_channel);
+}
+
+static int
+nouveau_vpe_reference_surface(struct nouveau_vd_vpe_channel *vpe_channel, 
+						uint32_t surface_index, uint64_t addr_offset,
+						bool is_luma)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	uint32_t value;
+	int ret;
+	
+	if (vpe_channel->dma.free < 8)
+		nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+		
+	nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_SURFACE << NV_VPE_CMD_TYPE_SHIFT
+		| (is_luma ? NV_VPE_CMD_INIT_SURFACE_LUMA(surface_index) : 
+		             NV_VPE_CMD_INIT_SURFACE_CHROMA(surface_index))
+		| NV_VPE_CMD_INIT_SURFACE_OFFSET_DIV(lower_32_bits(addr_offset)));
+	nouveau_vpe_cmd_align(vpe_channel);
+	
+	if (vpe_channel->dma.free >= NV_VPE_CMD_ALIGNMENT)
+		nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_INIT);
+	else
+		nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+		
+	ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.cur);
+	if (ret)
+		return ret;
+		
+	if (is_luma) {	
+		value = nv_rd32(dev, NV_VPE_MPEG2_LUMA_SURFACE_OFFSET_GET(surface_index));
+		if (lower_32_bits(addr_offset) != value) {
+			NV_ERROR(dev, "vpe - surface.luma ref is wrong. Expected 0x%08X, Got 0x%08X.\n", 
+				lower_32_bits(addr_offset), value);
+			return -EINVAL;
+		}
+	}
+	else {	
+		value = nv_rd32(dev, NV_VPE_MPEG2_CHROMA_SURFACE_OFFSET_GET(surface_index));
+		if (lower_32_bits(addr_offset) != value) {
+			NV_ERROR(dev, "vpe - surface.chroma ref is wrong. Expected 0x%08X, Got 0x%08X.\n", 
+				lower_32_bits(addr_offset), value);
+			return -EINVAL;
+		}
+	}
+		
+	return 0;
+}
+
+static int
+nouveau_vpe_channel_validate_surfaces(struct nouveau_vd_vpe_channel *vpe_channel,
+                        struct drm_nouveau_vd_vpe_surface *surfaces, int nr_surfaces,
+                        struct nouveau_vd_vpe_surface **target_vpe_surface)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	int ret;
+	int i;
+	struct nouveau_vd_vpe_surface *vpe_surface;
+	struct drm_nouveau_vd_vpe_surface *surface;
+	uint32_t decoder_surface_size = 0;
+		
+	for (i = 0, surface = surfaces; i < nr_surfaces; i++, surface++) {
+		if (unlikely(surface->surface_index >= ARRAY_SIZE(vpe_channel->surface))) {
+			NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - surface_index %d is invalid.\n", surface->surface_index);
+			return -EINVAL;
+		}
+
+		vpe_surface = &vpe_channel->surface[surface->surface_index];
+		if (!vpe_surface->luma_bo ||
+		    !nouveau_vpe_validate_surface(vpe_channel, surface->luma_handle, vpe_surface->luma_bo)) {
+			if (!decoder_surface_size)
+				decoder_surface_size = vpe_channel->width * vpe_channel->height;
+				
+			if (vpe_surface->luma_bo) {
+				nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->luma_bo);
+				vpe_surface->luma_bo = NULL;
+			}
+			
+			ret = nouveau_vpe_pin_surface(vpe_channel, surface->luma_handle,
+                        decoder_surface_size, &vpe_surface->luma_bo);
+            if (ret) {
+				NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - could not pin surface_index %d, luma handle 0x%08X, error %d.\n", surface->surface_index,
+				surface->luma_handle, ret);
+				return ret;
+			}
+			
+			ret = nouveau_vpe_reference_surface(vpe_channel, surface->surface_index, 
+										  vpe_surface->luma_bo->bo.offset, true);
+			if (ret) {
+				NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - could not reference surface_index %d, luma handle 0x%08X, error %d.\n", surface->surface_index,
+				surface->luma_handle, ret);
+				nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->luma_bo);
+				vpe_surface->luma_bo = NULL;
+				return ret;
+			}
+			
+			vpe_surface->dma_sequence = 0;
+		}
+		if (unlikely(!vpe_surface->chroma_bo) ||
+		    !nouveau_vpe_validate_surface(vpe_channel, surface->chroma_handle, vpe_surface->chroma_bo) ) {
+			
+			if (!decoder_surface_size)
+				decoder_surface_size = vpe_channel->width * vpe_channel->height;
+				
+			if (vpe_surface->chroma_bo) {
+				nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->chroma_bo);
+				vpe_surface->chroma_bo = NULL;
+			}
+			
+			ret = nouveau_vpe_pin_surface(vpe_channel, surface->chroma_handle,
+                        decoder_surface_size, &vpe_surface->chroma_bo);
+            if (ret) {
+				NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - could not pin surface_index %d, chroma handle 0x%08X, error %d.\n", surface->surface_index,
+				surface->luma_handle, ret);
+				return ret;
+			}
+			
+			ret = nouveau_vpe_reference_surface(vpe_channel, surface->surface_index, 
+			                                    vpe_surface->chroma_bo->bo.offset, false);
+			if (ret) {
+				NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - could not reference surface_index %d, chroma handle 0x%08X, error %d.\n", surface->surface_index,
+				surface->luma_handle, ret);
+				nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->chroma_bo);
+				vpe_surface->chroma_bo = NULL;
+				return ret;
+			}
+			
+			vpe_surface->dma_sequence = 0;
+		}
+		
+		/* First surface is considered the target.*/
+		if (i == 0)
+			*target_vpe_surface = vpe_surface;
+	}
+	
+	return 0;
+}
+
+static int 
+nouveau_vpe_channel_pushbuf_fire(struct nouveau_vd_vpe_channel *vpe_channel,
+				struct drm_nouveau_vd_vpe_pushbuf_fire *req)
+{
+	int ret;
+	uint32_t *pushbuf = NULL;
+	uint32_t *batches = NULL;
+	struct drm_nouveau_vd_vpe_surface *surfaces = NULL;
+	struct nouveau_vd_vpe_surface *vpe_surface = NULL;
+	int i;
+	uint32_t offset = 0;
+	uint32_t batch_size;
+	bool is_end_sequence = req->flags & NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_END_SEQUENCE;
+	bool is_update_dma_pos = req->flags & NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_UPDATE_DMA_POS;
+	bool do_fire_batch;
+	
+	if (req->nr_surfaces) {
+		surfaces = _u_memcpya(req->surfaces, req->nr_surfaces, sizeof(*surfaces));
+		if (unlikely(IS_ERR(surfaces))) {
+			ret = PTR_ERR(surfaces);
+			goto out;
+		}
+	}
+	
+	if (req->nr_dwords) {
+		pushbuf = _u_memcpya(req->dwords, req->nr_dwords, sizeof(uint32_t));
+		if (unlikely(IS_ERR(pushbuf))) {
+			ret = PTR_ERR(pushbuf);
+			goto out;
+		}
+	}
+	
+	if (req->nr_batches) {
+		batches = _u_memcpya(req->batches, req->nr_batches, sizeof(uint32_t));
+		if (unlikely(IS_ERR(batches))) {
+			ret = PTR_ERR(batches);
+			goto out;
+		}
+	}
+	
+	if (req->nr_surfaces) {
+		ret = nouveau_vpe_channel_validate_surfaces(vpe_channel,
+										surfaces, req->nr_surfaces, 
+										&vpe_surface);
+		if (unlikely(ret))
+			goto out;
+	}
+	
+	if (is_update_dma_pos) {
+		if (req->dma_cur >= vpe_channel->dma.max) {
+			ret = -EINVAL;
+		    goto out;
+		}
+		vpe_channel->dma.cur = req->dma_cur;
+		vpe_channel->dma.free = vpe_channel->dma.max - vpe_channel->dma.cur;
+		if (!is_end_sequence)
+			nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_NORMAL);
+	}
+	
+	for (i = 0; i < req->nr_batches; i++) {
+		batch_size = batches[i];
+		
+		do_fire_batch = !(batch_size & NOUVEAU_VD_VPE_PUSHBUF_FIRE_BATCH_DO_NOT_FIRE);
+		
+		batch_size &= 0xFFFF;
+		
+		if (unlikely(!batch_size)) {
+			ret = -EINVAL;
+			goto out;
+		}
+		
+		if (unlikely((batch_size + offset) > req->nr_dwords)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (batch_size > vpe_channel->dma.free)
+			nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+		
+		ret = nouveau_vpe_cmd_write_user_batch(vpe_channel, (const void *)((uint64_t)pushbuf + (offset << 2)), batch_size);
+		if (ret)
+			goto out;
+		
+		offset += batch_size;
+		vpe_channel->dma.free -= batch_size;
+		
+		if (!vpe_channel->dma.free) {
+			vpe_channel->dma.cur = 0;
+			vpe_channel->dma.free = vpe_channel->dma.max;
+			nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_END);
+		}
+		
+		if (do_fire_batch)
+			nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_NORMAL);
+	}
+	
+	if (req->nr_dwords) {
+		if (vpe_channel->dma.free < NV_VPE_MAX_MB) 
+			nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+	}
+
+	if (is_end_sequence) {
+		if (vpe_channel->dma.free < NV_VPE_CMD_ALIGNMENT)
+			nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+		nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+		nouveau_vpe_cmd_end_sequence_trailer(vpe_channel);
+		nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+		
+		if (vpe_surface) 
+			vpe_surface->dma_sequence = vpe_channel->dma.sequence;
+	}
+	
+	req->dma_free = vpe_channel->dma.free;
+	req->dma_cur = vpe_channel->dma.cur;
+	ret = 0;
+out:
+	if (!IS_ERR(surfaces) && surfaces)
+		kfree(surfaces);
+	if (!IS_ERR(batches) && batches)
+		kfree(batches);
+	if (!IS_ERR(pushbuf) && pushbuf)
+		kfree(pushbuf);
+		
+	return ret;
+}
+
+static int 
+nouveau_vpe_surface_query(struct nouveau_vd_vpe_channel *vpe_channel,
+				struct drm_nouveau_vd_vpe_surface_query *req)
+{
+	struct drm_device *dev = vpe_channel->dev;
+	struct nouveau_vd_vpe_surface *vpe_surface;
+	uint32_t i;
+	uint32_t value;
+	
+	if (unlikely(req->surface_index >= ARRAY_SIZE(vpe_channel->surface))) {
+		NV_ERROR(dev, "nouveau_vpe_surface_query - invalid surface index %d.\n", 
+			req->surface_index);
+		return -EINVAL; 
+	}
+	
+	req->is_busy = 0;
+	
+	vpe_surface = &vpe_channel->surface[req->surface_index];
+	
+	/* This is set when cmds are being written for the target surface.*/
+	if (vpe_surface->dma_sequence) {
+		/* Read the current sequence and see if any surfaces have finished rendering.*/
+		value = nv_rd32(dev, NV_VPE_MPEG2_SEQUENCE_GET);
+		for (i = 0; i < ARRAY_SIZE(vpe_channel->surface); i++) {
+			if (vpe_channel->surface[i].luma_bo || 
+			    vpe_channel->surface[i].chroma_bo) {
+				if (value >= vpe_channel->surface[i].dma_sequence)
+					vpe_channel->surface[i].dma_sequence = 0;
+				else if (i == req->surface_index) {
+					req->is_busy = 1;
+				}
+			}
+		}
+	}
+	
+	return 0;
+}
+
+/* IOCtls.*/
+
+int
+nouveau_vd_vpe_ioctl_channel_alloc(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	
+	struct drm_nouveau_vd_vpe_channel_alloc *req = data;
+
+	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+		
+	return nouveau_vpe_channel_alloc(dev, req, file_priv);
+}
+
+int
+nouveau_vd_vpe_ioctl_channel_free(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	struct nouveau_vd_vpe_channel *vpe_channel;
+	
+	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+	
+	NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(file_priv, vpe_channel);
+	
+	nouveau_vpe_channel_free(vpe_channel);
+			
+	return 0;
+}
+
+int nouveau_vd_vpe_ioctl_pushbuf_fire(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv)
+{
+	struct nouveau_vd_vpe_channel *vpe_channel;
+	struct drm_nouveau_vd_vpe_pushbuf_fire *req = data;
+	
+	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+	
+	NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(file_priv, vpe_channel);
+	
+	return nouveau_vpe_channel_pushbuf_fire(vpe_channel, req);
+}
+
+int nouveau_vd_vpe_ioctl_surface_query(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv)
+{
+	struct nouveau_vd_vpe_channel *vpe_channel;
+	struct drm_nouveau_vd_vpe_surface_query *req = data;
+	
+	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
+	
+	NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(file_priv, vpe_channel);
+	
+	return nouveau_vpe_surface_query(vpe_channel, req);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_vpe_hw.h b/drivers/gpu/drm/nouveau/nouveau_vpe_hw.h
new file mode 100644
index 000000000000..bcd524cd1a28
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_vpe_hw.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2010 Jimmy Rentz
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+ 
+#ifndef __NOUVEAU_VPE_HW_H__
+#define __NOUVEAU_VPE_HW_H__
+
+/* VPE is the video decoder engine that is found in nv30, nv40 and some 
+ * older hardware (geforce 4 and higher I believe).  
+ * It contains an mpeg2 decoder with the following properties:
+ * (-) Decodes at the idct level.  However, I believe older cards only
+ * support mc level.
+ * (-) 32x64 to 2032x2032 profiles.
+ * (-) 4:2:0 chroma sampling.
+ * (-) Only one set of registers so only one user unless some type of
+ * context/channel switching is added.*/
+
+#define NV_VPE_MAX_CHANNELS           1
+#define NV_VPE_MAX_SURFACES           8
+#define NV_VPE_MIN_WIDTH              32
+#define NV_VPE_MIN_HEIGHT             64
+#define NV_VPE_MAX_WIDTH              2032
+#define NV_VPE_MAX_HEIGHT             2032
+#define NV_VPE_PUSHBUFFER_SIZE        1 * 1024 * 1024
+
+#define NV_VPE_CMD_ALIGNMENT         16 
+
+#define NV_VPE_MAX_MB_BATCH          16
+
+#define NV_VPE_MAX_MB_HEADER         20
+#define NV_VPE_MAX_MB_DCT            (33 * 6)
+#define NV_VPE_MAX_MB                (NV_VPE_MAX_MB_HEADER + NV_VPE_MAX_MB_DCT)
+
+#define NV_VPE_CMD_TYPE_SHIFT          28
+
+#define NV_VPE_CMD_NOP                0x1
+
+#define NV_VPE_CMD_INIT_SURFACE       0x2
+  #define NV_VPE_CMD_INIT_SURFACE_LUMA(index) ( (index * 2) << 24)
+  #define NV_VPE_CMD_INIT_SURFACE_CHROMA(index) ( ( (index * 2) + 1) << 24)
+  #define NV_VPE_CMD_INIT_SURFACE_OFFSET_DIV(offset) (offset >> 5)
+  
+#define NV_VPE_CMD_INIT_CHANNEL       0x3
+  #define NV_VPE_CMD_INIT_CHANNEL_SURFACE_GROUP_INFO   0x1 /* ( (width round to 112) / 32 */
+  #define NV_VPE_CMD_INIT_CHANNEL_ACCEL                0x2 /* (0x1 to turn on idct operations). */
+         #define NV_VPE_CMD_INIT_CHANNEL_ACCEL_IDCT 0x1
+    
+#define NV_VPE_CMD_DCT_SEPARATOR      0x6
+#define NV_VPE_CMD_END_SEQUENCE	      0x7
+
+	#define NV_VPE_CMD_SEQUENCE       0x1
+
+/* DCT Blocks */
+#define NV_VPE_CMD_DCT_CHROMA_HEADER  0x8  
+#define NV_VPE_CMD_DCT_LUMA_HEADER    0x9
+	/* The block pattern is used for chroma and luma blocks */
+	#define NV_VPE_CMD_DCT_BLOCK_PATTERN(p)  ( (p) << 24)
+    /* Not sure what this is for. This is always set in the dct block header */
+	#define NV_VPE_CMD_DCT_BLOCK_UNKNOWN  0x10000
+    /* Target surface index. Is 0 based. */
+	#define NV_VPE_CMD_DCT_BLOCK_TARGET_SURFACE(s)	(s << 20)
+    /* If picture element is frame */
+	#define NV_VPE_CMD_PICT_FRAME    0x80000
+    /* If field based encoding and a luma block */
+    #define NV_VPE_CMD_PICT_FRAME_FIELD 0x800000
+    /* If picture element or field encoding is bottom field */
+    #define NV_VD_VPE_CMD_BOTTOM_FIELD      0x20000
+    /* If macroblock x coordinate is even */
+	#define NV_VD_VPE_CMD_EVEN_X_COORD 	    0x8000
+	
+/* Used to terminate a set of dct data blocks.*/
+#define NV_VPE_DCT_BLOCK_TERMINATOR   0x1
+	
+/* Used to designate dct data blocks that are all zero.*/
+#define NV_VPE_DCT_BLOCK_NULL         (0x80040000 | NV_VPE_DCT_BLOCK_TERMINATOR)
+
+/* Coordinates of dct */
+#define NV_VPE_CMD_DCT_COORDINATE     0xA
+    /* Luma */
+	#define NV_VPE_DCT_POINTS_LUMA(x,y,p) ( ( (y * 16 * p) << 12 ) | (x * 16) )
+    /* Chroma */
+	#define NV_VPE_DCT_POINTS_CHROMA(x,y,p) ( ( (y * 8 * p) << 12 ) | (x * 16) )
+
+
+/* Motion Vectors */
+#define NV_VPE_CMD_LUMA_MOTION_VECTOR_HEADER   0xD
+#define NV_VPE_CMD_CHROMA_MOTION_VECTOR_HEADER 0xC
+#define NV_VPE_CMD_MOTION_VECTOR               0xE
+
+    /* Motion Vector Header */
+    
+    /* Set if 2 motion vectors exist for this header. Otherwise, it is cleared and only 1 exists.*/
+	#define NV_VPE_CMD_MC_MV_COUNT_2               (0x1 << 16)
+	
+	/* [Field Picture or Field Motion Only] motion_vertical_field_select is set here.  
+	 * This means that the bottom field is selected for the given vertical vector. 
+	 * However, dual-prime blocks do not follow this rule.
+	 * It is treated speciallly for them.*/
+	#define NV_VPE_CMD_BOTTOM_FIELD_VERTICAL_MOTION_SELECT_FIRST     (0x1 << 17)
+	
+	/* [Frame Picture and Frame Motion Type only] */
+	#define NV_VPE_CMD_FRAME_PICT_FRAME_MOTION        (0x1 << 19)
+	
+	/* MC prediction surface index. Is 0 based. */
+	#define NV_VPE_CMD_PREDICTION_SURFACE(s) 		    (s << 20)
+	
+	/* Set if this is a second motion vector. Otherwise, the first one is assumed.*/
+	#define NV_VPE_CMD_MOTION_VECTOR_TYPE_SECOND      (0x1 << 23)
+	
+	/* [Frame Picture and Frame Motion Type OR Field Picture only]*/
+	#define NV_VPE_CMD_FRAME_FRAME_PICT_OR_FIELD      (0x1 << 24)
+	
+	/* If Vertical Motion Vector is odd then set. This is before any operations are done. */
+	#define NV_VPE_CMD_ODD_VERTICAL_MOTION_VECTOR     (0x1 << 25)
+	
+	/* If Horizontal Motion Vector is odd then set. This is before any operations are done. */
+	#define NV_VPE_CMD_ODD_HORIZONTAL_MOTION_VECTOR   (0x1 << 26)
+	
+	/* If set then the motion vectors are backward.  Otherwise, they are forward.*/
+	#define NV_VPE_CMD_MOTION_VECTOR_BACKWARD         (0x1 << 27)
+	
+	/* Motion Vectors. This is the equation used for each motion vector.
+	 * d is only used as a second vector displacement in a couple of cases.
+	 */
+	#define NV_VPE_MOTION_VECTOR_VERTICAL(y, c, v, q, d)          ( ( (y * c) + (v / q) + d) << 12)
+	#define NV_VPE_MOTION_VECTOR_HORIZONTAL(x, c, v, q, d)        ( (x * c) + (v / q) + d)
+
+#endif
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index a6a9f4af5ebd..3e61fd3a67d7 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -183,6 +183,52 @@ enum nouveau_bus_type {
 struct drm_nouveau_sarea {
 };
 
+/* VPE Supports mpeg2 only.*/
+struct drm_nouveau_vd_vpe_channel_alloc {
+	uint32_t width;
+	uint32_t height;
+	/* Used for user pushbuf access.
+	 * mmio access is not allowed so you still need to fire as normal.*/
+	uint32_t pushbuf_handle;
+};
+
+struct drm_nouveau_vd_vpe_channel_free {
+};
+
+#define NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_END_SEQUENCE   0x00000001
+#define NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_UPDATE_DMA_POS 0x00000002
+/* structure for surface.*/
+struct drm_nouveau_vd_vpe_surface {
+	uint32_t luma_handle;
+	uint32_t chroma_handle;
+	uint32_t surface_index;
+};
+
+/* This flag lets you turn off firing for a specific batch. 
+ * This is needed in some cases to avoid locking up the decoder.*/
+#define NOUVEAU_VD_VPE_PUSHBUF_FIRE_BATCH_DO_NOT_FIRE  0x10000000
+struct drm_nouveau_vd_vpe_pushbuf_fire {
+	/* [in] */
+	uint32_t nr_dwords;
+	uint64_t dwords;
+	uint32_t nr_batches;
+	uint64_t batches;
+	/* Surface[0] is always the target.*/
+	uint32_t nr_surfaces;
+	uint64_t surfaces;
+	uint32_t flags;
+	/* Needed when writing to the hw pushbuf from user space.
+	 * This also will perform a fire.*/
+	uint32_t dma_cur;
+	/* [out] */
+	uint32_t dma_free;
+};
+
+struct drm_nouveau_vd_vpe_surface_query {
+	uint32_t surface_index;
+	uint32_t is_busy;
+};
+
 #define DRM_NOUVEAU_GETPARAM           0x00
 #define DRM_NOUVEAU_SETPARAM           0x01
 #define DRM_NOUVEAU_CHANNEL_ALLOC      0x02
@@ -195,5 +241,9 @@ struct drm_nouveau_sarea {
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
+#define DRM_NOUVEAU_VD_VPE_CHANNEL_ALLOC  0x49
+#define DRM_NOUVEAU_VD_VPE_CHANNEL_FREE   0x50
+#define DRM_NOUVEAU_VD_VPE_PUSHBUF_FIRE   0x51
+#define DRM_NOUVEAU_VD_VPE_SURFACE_QUERY  0x52
 
 #endif /* __NOUVEAU_DRM_H__ */
author	Jimmy Rentz <jb17bsome@gmail.com>	2010-04-08 01:31:13 -0400
committer	Jimmy Rentz <jb17bsome@gmail.com>	2010-04-08 20:57:53 -0400
commit	538d6ef8aac1cd861f6336e24e79a315fe58aba0 (patch)
tree	922affc6aa30647e66937e5f78b1398201e21423
parent	de2f7caff0bfd6c487fbc28dbf7eafef8a993c56 (diff)