57 files changed, 12772 insertions, 5284 deletions
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index f223b2d9228..1f286776b5f 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -4,31 +4,53 @@
 TOP = ../../../../..
 include $(TOP)/configs/current
 
+CFLAGS += $(RADEON_CFLAGS)
+
 LIBNAME = radeon_dri.so
 
 MINIGLX_SOURCES = server/radeon_dri.c 
 
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c
+endif
+
+RADEON_COMMON_SOURCES = \
+	radeon_bo_legacy.c \
+	radeon_common_context.c \
+	radeon_common.c \
+	radeon_cs_legacy.c \
+	radeon_dma.c \
+	radeon_debug.c \
+	radeon_fbo.c \
+	radeon_lock.c \
+	radeon_mipmap_tree.c \
+	radeon_queryobj.c \
+	radeon_span.c \
+	radeon_texture.c
+
 DRIVER_SOURCES = \
 	radeon_context.c \
 	radeon_ioctl.c \
-	radeon_lock.c \
 	radeon_screen.c \
 	radeon_state.c \
 	radeon_state_init.c \
 	radeon_tex.c \
-	radeon_texmem.c \
 	radeon_texstate.c \
 	radeon_tcl.c \
 	radeon_swtcl.c \
-	radeon_span.c \
 	radeon_maos.c \
-	radeon_sanity.c 
+	radeon_sanity.c \
+	$(RADEON_COMMON_SOURCES)
 
 C_SOURCES = \
 	$(COMMON_SOURCES) \
-	$(DRIVER_SOURCES) 
+	$(DRIVER_SOURCES) \
+	$(CS_SOURCES)
+
+DRIVER_DEFINES = -DRADEON_COMMON=0 \
+				 -Wall
 
-DRIVER_DEFINES = -DRADEON_COMMON=0
+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
 
 X86_SOURCES = 
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
new file mode 100644
index 00000000000..71413716333
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
@@ -0,0 +1,219 @@
+/* 
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BO_H
+#define RADEON_BO_H
+
+#include <stdio.h>
+#include <stdint.h>
+//#include "radeon_track.h"
+
+/* bo object */
+#define RADEON_BO_FLAGS_MACRO_TILE  1
+#define RADEON_BO_FLAGS_MICRO_TILE  2
+
+struct radeon_bo_manager;
+
+struct radeon_bo {
+    uint32_t                    alignment;
+    uint32_t                    handle;
+    uint32_t                    size;
+    uint32_t                    domains;
+    uint32_t                    flags;
+    unsigned                    cref;
+#ifdef RADEON_BO_TRACK
+    struct radeon_track         *track;
+#endif
+    void                        *ptr;
+    struct radeon_bo_manager    *bom;
+    uint32_t                    space_accounted;
+};
+
+/* bo functions */
+struct radeon_bo_funcs {
+    struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
+                                 uint32_t handle,
+                                 uint32_t size,
+                                 uint32_t alignment,
+                                 uint32_t domains,
+                                 uint32_t flags);
+    void (*bo_ref)(struct radeon_bo *bo);
+    struct radeon_bo *(*bo_unref)(struct radeon_bo *bo);
+    int (*bo_map)(struct radeon_bo *bo, int write);
+    int (*bo_unmap)(struct radeon_bo *bo);
+    int (*bo_wait)(struct radeon_bo *bo);
+    int (*bo_is_static)(struct radeon_bo *bo);
+    int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags,
+			  uint32_t pitch);
+    int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags,
+			  uint32_t *pitch);
+    int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain);
+};
+
+struct radeon_bo_manager {
+    struct radeon_bo_funcs  *funcs;
+    int                     fd;
+
+#ifdef RADEON_BO_TRACK
+    struct radeon_tracker   tracker;
+#endif
+};
+    
+static inline void _radeon_bo_debug(struct radeon_bo *bo,
+                                    const char *op,
+                                    const char *file,
+                                    const char *func,
+                                    int line)
+{
+    fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n",
+            op, bo, bo->handle, bo->size, bo->cref, file, func, line);
+}
+
+static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom,
+                                                uint32_t handle,
+                                                uint32_t size,
+                                                uint32_t alignment,
+                                                uint32_t domains,
+                                                uint32_t flags,
+                                                const char *file,
+                                                const char *func,
+                                                int line)
+{
+    struct radeon_bo *bo;
+
+    bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
+
+#ifdef RADEON_BO_TRACK
+    if (bo) {
+        bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle);
+        radeon_track_add_event(bo->track, file, func, "open", line);
+    }
+#endif
+    return bo;
+}
+
+static inline void _radeon_bo_ref(struct radeon_bo *bo,
+                                  const char *file,
+                                  const char *func,
+                                  int line)
+{
+    bo->cref++;
+#ifdef RADEON_BO_TRACK
+    radeon_track_add_event(bo->track, file, func, "ref", line); 
+#endif
+    bo->bom->funcs->bo_ref(bo);
+}
+
+static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo,
+                                                 const char *file,
+                                                 const char *func,
+                                                 int line)
+{
+    bo->cref--;
+#ifdef RADEON_BO_TRACK
+    radeon_track_add_event(bo->track, file, func, "unref", line);
+    if (bo->cref <= 0) {
+        radeon_tracker_remove_track(&bo->bom->tracker, bo->track);
+        bo->track = NULL;
+    }
+#endif
+    return bo->bom->funcs->bo_unref(bo);
+}
+
+static inline int _radeon_bo_map(struct radeon_bo *bo,
+                                 int write,
+                                 const char *file,
+                                 const char *func,
+                                 int line)
+{
+    return bo->bom->funcs->bo_map(bo, write);
+}
+
+static inline int _radeon_bo_unmap(struct radeon_bo *bo,
+                                   const char *file,
+                                   const char *func,
+                                   int line)
+{
+    return bo->bom->funcs->bo_unmap(bo);
+}
+
+static inline int _radeon_bo_wait(struct radeon_bo *bo,
+                                  const char *file,
+                                  const char *func,
+                                  int line)
+{
+    return bo->bom->funcs->bo_wait(bo);
+}
+
+static inline int _radeon_bo_is_busy(struct radeon_bo *bo,
+				     uint32_t *domain,
+                                     const char *file,
+                                     const char *func,
+                                     int line)
+{
+    return bo->bom->funcs->bo_is_busy(bo, domain);
+}
+
+static inline int radeon_bo_set_tiling(struct radeon_bo *bo,
+				       uint32_t tiling_flags, uint32_t pitch)
+{
+    return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch);
+}
+
+static inline int radeon_bo_get_tiling(struct radeon_bo *bo,
+				       uint32_t *tiling_flags, uint32_t *pitch)
+{
+    return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch);
+}
+
+static inline int radeon_bo_is_static(struct radeon_bo *bo)
+{
+	if (bo->bom->funcs->bo_is_static)
+		return bo->bom->funcs->bo_is_static(bo);
+	return 0;
+}
+
+#define radeon_bo_open(bom, h, s, a, d, f)\
+    _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_ref(bo)\
+    _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_unref(bo)\
+    _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_map(bo, w)\
+    _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_unmap(bo)\
+    _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_debug(bo, opcode)\
+    _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__)
+#define radeon_bo_wait(bo) \
+    _radeon_bo_wait(bo, __FILE__, __func__, __LINE__)
+#define radeon_bo_is_busy(bo, domain) \
+    _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__)
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
new file mode 100644
index 00000000000..3e7547d2f9d
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
@@ -0,0 +1,926 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Dave Airlie
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Dave Airlie
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "xf86drm.h"
+#include "texmem.h"
+#include "main/simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_common.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_macros.h"
+
+/* no seriously texmem.c is this screwed up */
+struct bo_legacy_texture_object {
+    driTextureObject    base;
+    struct bo_legacy *parent;
+};
+
+struct bo_legacy {
+    struct radeon_bo    base;
+    int                 map_count;
+    uint32_t            pending;
+    int                 is_pending;
+    int                 static_bo;
+    uint32_t            offset;
+    struct bo_legacy_texture_object *tobj;
+    int                 validated;
+    int                 dirty;
+    void                *ptr;
+    struct bo_legacy    *next, *prev;
+    struct bo_legacy    *pnext, *pprev;
+};
+
+struct bo_manager_legacy {
+    struct radeon_bo_manager    base;
+    unsigned                    nhandle;
+    unsigned                    nfree_handles;
+    unsigned                    cfree_handles;
+    uint32_t                    current_age;
+    struct bo_legacy            bos;
+    struct bo_legacy            pending_bos;
+    uint32_t                    fb_location;
+    uint32_t                    texture_offset;
+    unsigned                    dma_alloc_size;
+    uint32_t                    dma_buf_count;
+    unsigned                    cpendings;
+    driTextureObject            texture_swapped;
+    driTexHeap                  *texture_heap;
+    struct radeon_screen        *screen;
+    unsigned                    *free_handles;
+};
+
+static void bo_legacy_tobj_destroy(void *data, driTextureObject *t)
+{
+    struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t;
+    
+    if (tobj->parent) {
+        tobj->parent->tobj = NULL;
+        tobj->parent->validated = 0;
+    }
+}
+
+static void inline clean_handles(struct bo_manager_legacy *bom)
+{
+  while (bom->cfree_handles > 0 &&
+	 !bom->free_handles[bom->cfree_handles - 1])
+    bom->cfree_handles--;
+
+}
+static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle)
+{
+    uint32_t tmp;
+
+    *handle = 0;
+    if (bom->nhandle == 0xFFFFFFFF) {
+        return -EINVAL;
+    }
+    if (bom->cfree_handles > 0) {
+        tmp = bom->free_handles[--bom->cfree_handles];
+	clean_handles(bom);
+    } else {
+        bom->cfree_handles = 0;
+        tmp = bom->nhandle++;
+    }
+    assert(tmp);
+    *handle = tmp;
+    return 0;
+}
+
+static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle)
+{
+    uint32_t *handles;
+
+    if (!handle) {
+        return 0;
+    }
+    if (handle == (bom->nhandle - 1)) {
+        int i;
+
+        bom->nhandle--;
+        for (i = bom->cfree_handles - 1; i >= 0; i--) {
+            if (bom->free_handles[i] == (bom->nhandle - 1)) {
+                bom->nhandle--;
+                bom->free_handles[i] = 0;
+            }
+        }
+        clean_handles(bom);
+        return 0;
+    }
+    if (bom->cfree_handles < bom->nfree_handles) {
+        bom->free_handles[bom->cfree_handles++] = handle;
+        return 0;
+    }
+    bom->nfree_handles += 0x100;
+    handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4);
+    if (handles == NULL) {
+        bom->nfree_handles -= 0x100;
+        return -ENOMEM;
+    }
+    bom->free_handles = handles;
+    bom->free_handles[bom->cfree_handles++] = handle;
+    return 0;
+}
+
+static void legacy_get_current_age(struct bo_manager_legacy *boml)
+{
+    drm_radeon_getparam_t gp;
+    unsigned char *RADEONMMIO = NULL;
+    int r;
+
+    if (   IS_R300_CLASS(boml->screen) 
+        || IS_R600_CLASS(boml->screen) ) 
+    {
+    	gp.param = RADEON_PARAM_LAST_CLEAR;
+    	gp.value = (int *)&boml->current_age;
+    	r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM,
+       	                     &gp, sizeof(gp));
+    	if (r) {
+       	 fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
+         exit(1);
+       }
+    } 
+    else {
+        RADEONMMIO = boml->screen->mmio.map;
+        boml->current_age = boml->screen->scratch[3];
+        boml->current_age = INREG(RADEON_GUI_SCRATCH_REG3);
+    }
+}
+
+static int legacy_is_pending(struct radeon_bo *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (bo_legacy->is_pending <= 0) {
+        bo_legacy->is_pending = 0;
+        return 0;
+    }
+    if (boml->current_age >= bo_legacy->pending) {
+        if (boml->pending_bos.pprev == bo_legacy) {
+            boml->pending_bos.pprev = bo_legacy->pprev;
+        }
+        bo_legacy->pprev->pnext = bo_legacy->pnext;
+        if (bo_legacy->pnext) {
+            bo_legacy->pnext->pprev = bo_legacy->pprev;
+        }
+	assert(bo_legacy->is_pending <= bo->cref);
+        while (bo_legacy->is_pending--) {
+	    bo = radeon_bo_unref(bo);
+	    if (!bo)
+	      break;
+        }
+	if (bo)
+	  bo_legacy->is_pending = 0;
+        boml->cpendings--;
+        return 0;
+    }
+    return 1;
+}
+
+static int legacy_wait_pending(struct radeon_bo *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (!bo_legacy->is_pending) {
+        return 0;
+    }
+    /* FIXME: lockup and userspace busy looping that's all the folks */
+    legacy_get_current_age(boml);
+    while (legacy_is_pending(bo)) {
+        usleep(10);
+        legacy_get_current_age(boml);
+    }
+    return 0;
+}
+
+void legacy_track_pending(struct radeon_bo_manager *bom, int debug)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy*) bom;
+    struct bo_legacy *bo_legacy;
+    struct bo_legacy *next;
+
+    legacy_get_current_age(boml);
+    bo_legacy = boml->pending_bos.pnext;
+    while (bo_legacy) {
+        if (debug)
+            fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size,
+                    boml->current_age, bo_legacy->pending);
+        next = bo_legacy->pnext;
+        if (legacy_is_pending(&(bo_legacy->base))) {
+        }
+        bo_legacy = next;
+    } 
+}
+
+static int legacy_wait_any_pending(struct bo_manager_legacy *boml)
+{
+    struct bo_legacy *bo_legacy;
+
+    legacy_get_current_age(boml);
+    bo_legacy = boml->pending_bos.pnext;
+    if (!bo_legacy)
+      return -1;
+    legacy_wait_pending(&bo_legacy->base);
+    return 0;
+}
+
+static void legacy_kick_all_buffers(struct bo_manager_legacy *boml)
+{
+    struct bo_legacy *legacy;
+
+    legacy = boml->bos.next;
+    while (legacy != &boml->bos) {
+	if (legacy->tobj) {
+	    if (legacy->validated) {
+		driDestroyTextureObject(&legacy->tobj->base);
+		legacy->tobj = 0;
+		legacy->validated = 0;
+	    }
+	}
+	legacy = legacy->next;
+    }
+}
+
+static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
+                                     uint32_t size,
+                                     uint32_t alignment,
+                                     uint32_t domains,
+                                     uint32_t flags)
+{
+    struct bo_legacy *bo_legacy;
+    static int pgsize;
+
+    if (pgsize == 0)
+        pgsize = getpagesize() - 1;
+
+    size = (size + pgsize) & ~pgsize;
+
+    bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy));
+    if (bo_legacy == NULL) {
+        return NULL;
+    }
+    bo_legacy->base.bom = (struct radeon_bo_manager*)boml;
+    bo_legacy->base.handle = 0;
+    bo_legacy->base.size = size;
+    bo_legacy->base.alignment = alignment;
+    bo_legacy->base.domains = domains;
+    bo_legacy->base.flags = flags;
+    bo_legacy->base.ptr = NULL;
+    bo_legacy->map_count = 0;
+    bo_legacy->next = NULL;
+    bo_legacy->prev = NULL;
+    bo_legacy->pnext = NULL;
+    bo_legacy->pprev = NULL;
+    bo_legacy->next = boml->bos.next;
+    bo_legacy->prev = &boml->bos;
+    boml->bos.next = bo_legacy;
+    if (bo_legacy->next) {
+        bo_legacy->next->prev = bo_legacy;
+    }
+
+    return bo_legacy;
+}
+
+static int bo_dma_alloc(struct radeon_bo *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    drm_radeon_mem_alloc_t alloc;
+    unsigned size;
+    int base_offset;
+    int r;
+
+    /* align size on 4Kb */
+    size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
+    alloc.region = RADEON_MEM_REGION_GART;
+    alloc.alignment = bo_legacy->base.alignment;
+    alloc.size = size;
+    alloc.region_offset = &base_offset;
+    r = drmCommandWriteRead(bo->bom->fd,
+                            DRM_RADEON_ALLOC,
+                            &alloc,
+                            sizeof(alloc));
+    if (r) {
+        /* ptr is set to NULL if dma allocation failed */
+        bo_legacy->ptr = NULL;
+        return r;
+    }
+    bo_legacy->ptr = boml->screen->gartTextures.map + base_offset;
+    bo_legacy->offset = boml->screen->gart_texture_offset + base_offset;
+    bo->size = size;
+    boml->dma_alloc_size += size;
+    boml->dma_buf_count++;
+    return 0;
+}
+
+static int bo_dma_free(struct radeon_bo *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    drm_radeon_mem_free_t memfree;
+    int r;
+
+    if (bo_legacy->ptr == NULL) {
+        /* ptr is set to NULL if dma allocation failed */
+        return 0;
+    }
+    legacy_get_current_age(boml);
+    memfree.region = RADEON_MEM_REGION_GART;
+    memfree.region_offset  = bo_legacy->offset;
+    memfree.region_offset -= boml->screen->gart_texture_offset;
+    r = drmCommandWrite(boml->base.fd,
+                        DRM_RADEON_FREE,
+                        &memfree,
+                        sizeof(memfree));
+    if (r) {
+        fprintf(stderr, "Failed to free bo[%p] at %08x\n",
+                &bo_legacy->base, memfree.region_offset);
+        fprintf(stderr, "ret = %s\n", strerror(-r));
+        return r;
+    }
+    boml->dma_alloc_size -= bo_legacy->base.size;
+    boml->dma_buf_count--;
+    return 0;
+}
+
+static void bo_free(struct bo_legacy *bo_legacy)
+{
+    struct bo_manager_legacy *boml;
+
+    if (bo_legacy == NULL) {
+        return;
+    }
+    boml = (struct bo_manager_legacy *)bo_legacy->base.bom;
+    bo_legacy->prev->next = bo_legacy->next;
+    if (bo_legacy->next) {
+        bo_legacy->next->prev = bo_legacy->prev;
+    }
+    if (!bo_legacy->static_bo) {
+        legacy_free_handle(boml, bo_legacy->base.handle);
+        if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
+            /* dma buffers */
+            bo_dma_free(&bo_legacy->base);
+        } else {
+  	    driDestroyTextureObject(&bo_legacy->tobj->base);
+	    bo_legacy->tobj = NULL;
+            /* free backing store */
+            free(bo_legacy->ptr);
+        }
+    }
+    memset(bo_legacy, 0 , sizeof(struct bo_legacy));
+    free(bo_legacy);
+}
+
+static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
+                                 uint32_t handle,
+                                 uint32_t size,
+                                 uint32_t alignment,
+                                 uint32_t domains,
+                                 uint32_t flags)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    struct bo_legacy *bo_legacy;
+    int r;
+
+    if (handle) {
+        bo_legacy = boml->bos.next;
+        while (bo_legacy) {
+            if (bo_legacy->base.handle == handle) {
+                radeon_bo_ref(&(bo_legacy->base));
+                return (struct radeon_bo*)bo_legacy;
+            }
+            bo_legacy = bo_legacy->next;
+        }
+        return NULL;
+    }
+    bo_legacy = bo_allocate(boml, size, alignment, domains, flags);
+    bo_legacy->static_bo = 0;
+    r = legacy_new_handle(boml, &bo_legacy->base.handle);
+    if (r) {
+        bo_free(bo_legacy);
+        return NULL;
+    }
+    if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) 
+    {
+retry:
+        legacy_track_pending(&boml->base, 0);
+        /* dma buffers */
+
+        r = bo_dma_alloc(&(bo_legacy->base));
+        if (r) 
+        {
+	         if (legacy_wait_any_pending(boml) == -1) 
+             {
+                  bo_free(bo_legacy);
+	              return NULL;
+             }
+	         goto retry;
+	         return NULL;
+        }
+    } 
+    else 
+    {
+        bo_legacy->ptr = malloc(bo_legacy->base.size);
+        if (bo_legacy->ptr == NULL) {
+            bo_free(bo_legacy);
+            return NULL;
+        }
+    }
+    radeon_bo_ref(&(bo_legacy->base));
+
+    return (struct radeon_bo*)bo_legacy;
+}
+
+static void bo_ref(struct radeon_bo *bo)
+{
+}
+
+static struct radeon_bo *bo_unref(struct radeon_bo *bo)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (bo->cref <= 0) {
+        bo_legacy->prev->next = bo_legacy->next;
+        if (bo_legacy->next) {
+            bo_legacy->next->prev = bo_legacy->prev;
+        }
+        if (!bo_legacy->is_pending) {
+            bo_free(bo_legacy);
+        }
+        return NULL;
+    }
+    return bo;
+}
+
+static int bo_map(struct radeon_bo *bo, int write)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    legacy_wait_pending(bo);
+    bo_legacy->validated = 0;
+    bo_legacy->dirty = 1;
+    bo_legacy->map_count++;
+    bo->ptr = bo_legacy->ptr;
+    /* Read the first pixel in the frame buffer.  This should
+     * be a noop, right?  In fact without this conform fails as reading
+     * from the framebuffer sometimes produces old results -- the
+     * on-card read cache gets mixed up and doesn't notice that the
+     * framebuffer has been updated.
+     *
+     * Note that we should probably be reading some otherwise unused
+     * region of VRAM, otherwise we might get incorrect results when
+     * reading pixels from the top left of the screen.
+     *
+     * I found this problem on an R420 with glean's texCube test.
+     * Note that the R200 span code also *writes* the first pixel in the
+     * framebuffer, but I've found this to be unnecessary.
+     *  -- Nicolai Hähnle, June 2008
+     */
+    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+        int p;
+        volatile int *buf = (int*)boml->screen->driScreen->pFB;
+        p = *buf;
+    }
+
+    return 0;
+}
+
+static int bo_unmap(struct radeon_bo *bo)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (--bo_legacy->map_count > 0) 
+    {
+        return 0;
+    }
+    
+    bo->ptr = NULL;
+
+    return 0;
+}
+
+static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain)
+{
+    *domain = 0;
+    if (bo->domains & RADEON_GEM_DOMAIN_GTT)
+        *domain = RADEON_GEM_DOMAIN_GTT;
+    else
+        *domain = RADEON_GEM_DOMAIN_CPU;
+    if (legacy_is_pending(bo))
+        return -EBUSY;
+    else
+        return 0;
+}
+
+static int bo_is_static(struct radeon_bo *bo)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    return bo_legacy->static_bo;
+}
+
+static struct radeon_bo_funcs bo_legacy_funcs = {
+    bo_open,
+    bo_ref,
+    bo_unref,
+    bo_map,
+    bo_unmap,
+    NULL,
+    bo_is_static,
+    NULL,
+    NULL,
+    bo_is_busy
+};
+
+static int bo_vram_validate(struct radeon_bo *bo,
+                            uint32_t *soffset,
+                            uint32_t *eoffset)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    int r;
+    int retry_count = 0, pending_retry = 0;
+    
+    if (!bo_legacy->tobj) {
+	bo_legacy->tobj = CALLOC(sizeof(struct bo_legacy_texture_object));
+	bo_legacy->tobj->parent = bo_legacy;
+	make_empty_list(&bo_legacy->tobj->base);
+	bo_legacy->tobj->base.totalSize = bo->size;
+    retry:
+        r = driAllocateTexture(&boml->texture_heap, 1,
+                               &bo_legacy->tobj->base);
+        if (r) {
+		pending_retry = 0;
+		while(boml->cpendings && pending_retry++ < 10000) {
+			legacy_track_pending(&boml->base, 0);
+			retry_count++;
+			if (retry_count > 2) {
+				free(bo_legacy->tobj);
+				bo_legacy->tobj = NULL;
+				fprintf(stderr, "Ouch! vram_validate failed %d\n", r);
+				return -1;
+			}
+			goto retry;
+		}
+	}
+        bo_legacy->offset = boml->texture_offset +
+                            bo_legacy->tobj->base.memBlock->ofs;
+        bo_legacy->dirty = 1;
+    }
+
+    assert(bo_legacy->tobj->base.memBlock);
+
+    if (bo_legacy->tobj)
+	driUpdateTextureLRU(&bo_legacy->tobj->base);
+
+    if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) {
+	    if (IS_R600_CLASS(boml->screen)) {
+		    drm_radeon_texture_t tex;
+		    drm_radeon_tex_image_t tmp;
+		    int ret;
+
+		    tex.offset = bo_legacy->offset;
+		    tex.image = &tmp;
+		    assert(!(tex.offset & 1023));
+
+		    tmp.x = 0;
+		    tmp.y = 0;
+		    tmp.width = bo->size;
+		    tmp.height = 1;
+		    tmp.data = bo_legacy->ptr;
+		    tex.format = RADEON_TXFORMAT_ARGB8888;
+		    tex.width = tmp.width;
+		    tex.height = tmp.height;
+		    tex.pitch = bo->size;
+		    do {
+			    ret = drmCommandWriteRead(bo->bom->fd,
+						      DRM_RADEON_TEXTURE,
+						      &tex,
+						      sizeof(drm_radeon_texture_t));
+			    if (ret) {
+				    if (RADEON_DEBUG & RADEON_IOCTL)
+					    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+				    usleep(1);
+			    }
+		    } while (ret == -EAGAIN);
+	    } else {
+		    /* Copy to VRAM using a blit.
+		     * All memory is 4K aligned. We're using 1024 pixels wide blits.
+		     */
+		    drm_radeon_texture_t tex;
+		    drm_radeon_tex_image_t tmp;
+		    int ret;
+
+		    tex.offset = bo_legacy->offset;
+		    tex.image = &tmp;
+		    assert(!(tex.offset & 1023));
+
+		    tmp.x = 0;
+		    tmp.y = 0;
+		    if (bo->size < 4096) {
+			    tmp.width = (bo->size + 3) / 4;
+			    tmp.height = 1;
+		    } else {
+			    tmp.width = 1024;
+			    tmp.height = (bo->size + 4095) / 4096;
+		    }
+		    tmp.data = bo_legacy->ptr;
+		    tex.format = RADEON_TXFORMAT_ARGB8888;
+		    tex.width = tmp.width;
+		    tex.height = tmp.height;
+		    tex.pitch = MAX2(tmp.width / 16, 1);
+		    do {
+			    ret = drmCommandWriteRead(bo->bom->fd,
+						      DRM_RADEON_TEXTURE,
+						      &tex,
+						      sizeof(drm_radeon_texture_t));
+			    if (ret) {
+				    if (RADEON_DEBUG & RADEON_IOCTL)
+					    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+				    usleep(1);
+			    }
+		    } while (ret == -EAGAIN);
+	    }
+	    bo_legacy->dirty = 0;
+	    bo_legacy->tobj->base.dirty_images[0] = 0;
+    }
+    return 0;
+}
+
+/* 
+ *  radeon_bo_legacy_validate -
+ *  returns:
+ *  0 - all good
+ *  -EINVAL - mapped buffer can't be validated
+ *  -EAGAIN - restart validation we've kicked all the buffers out
+ */
+int radeon_bo_legacy_validate(struct radeon_bo *bo,
+                              uint32_t *soffset,
+                              uint32_t *eoffset)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    int r;
+    int retries = 0;
+
+    if (bo_legacy->map_count) {
+        fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
+                bo, bo->size, bo_legacy->map_count);
+        return -EINVAL;
+    }
+    if (bo_legacy->static_bo || bo_legacy->validated) {
+        *soffset = bo_legacy->offset;
+        *eoffset = bo_legacy->offset + bo->size;
+
+        return 0;
+    }
+    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+
+        r = bo_vram_validate(bo, soffset, eoffset);
+        if (r) {
+	    legacy_track_pending(&boml->base, 0);
+	    legacy_kick_all_buffers(boml);
+	    retries++;
+	    if (retries == 2) {
+		fprintf(stderr,"legacy bo: failed to get relocations into aperture\n");
+		assert(0);
+		exit(-1);
+	    }
+	    return -EAGAIN;
+        }
+    }
+    *soffset = bo_legacy->offset;
+    *eoffset = bo_legacy->offset + bo->size;
+    bo_legacy->validated = 1;
+
+    return 0;
+}
+
+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    bo_legacy->pending = pending;
+    bo_legacy->is_pending++;
+    /* add to pending list */
+    radeon_bo_ref(bo);
+    if (bo_legacy->is_pending > 1) {
+        return;    
+    }
+    bo_legacy->pprev = boml->pending_bos.pprev;
+    bo_legacy->pnext = NULL;
+    bo_legacy->pprev->pnext = bo_legacy;
+    boml->pending_bos.pprev = bo_legacy;
+    boml->cpendings++;
+}
+
+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    struct bo_legacy *bo_legacy;
+
+    if (bom == NULL) {
+        return;
+    }
+    bo_legacy = boml->bos.next;
+    while (bo_legacy) {
+        struct bo_legacy *next;
+
+        next = bo_legacy->next;
+        bo_free(bo_legacy);
+        bo_legacy = next;
+    }
+    driDestroyTextureHeap(boml->texture_heap);
+    free(boml->free_handles);
+    free(boml);
+}
+
+static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom,
+						       int size,
+						       uint32_t offset)
+{
+    struct bo_legacy *bo;
+
+    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+    if (bo == NULL)
+	return NULL;
+    bo->static_bo = 1;
+    bo->offset = offset + bom->fb_location;
+    bo->base.handle = bo->offset;
+    bo->ptr = bom->screen->driScreen->pFB + offset;
+    if (bo->base.handle > bom->nhandle) {
+        bom->nhandle = bo->base.handle + 1;
+    }
+    radeon_bo_ref(&(bo->base));
+    return bo;
+}
+
+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn)
+{
+    struct bo_manager_legacy *bom;
+    struct bo_legacy *bo;
+    unsigned size;
+
+    bom = (struct bo_manager_legacy*)
+          calloc(1, sizeof(struct bo_manager_legacy));
+    if (bom == NULL) {
+        return NULL;
+    }
+
+    make_empty_list(&bom->texture_swapped);
+
+    bom->texture_heap = driCreateTextureHeap(0,
+                                             bom,
+                                             scrn->texSize[0],
+                                             12,
+                                             RADEON_NR_TEX_REGIONS,
+                                             (drmTextureRegionPtr)scrn->sarea->tex_list[0],
+                                             &scrn->sarea->tex_age[0],
+                                             &bom->texture_swapped,
+                                             sizeof(struct bo_legacy_texture_object),
+                                             &bo_legacy_tobj_destroy);
+    bom->texture_offset = scrn->texOffset[0];
+
+    bom->base.funcs = &bo_legacy_funcs;
+    bom->base.fd = scrn->driScreen->fd;
+    bom->bos.next = NULL;
+    bom->bos.prev = NULL;
+    bom->pending_bos.pprev = &bom->pending_bos;
+    bom->pending_bos.pnext = NULL;
+    bom->screen = scrn;
+    bom->fb_location = scrn->fbLocation;
+    bom->nhandle = 1;
+    bom->cfree_handles = 0;
+    bom->nfree_handles = 0x400;
+    bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4);
+    if (bom->free_handles == NULL) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+
+    /* biggest framebuffer size */
+    size = 4096*4096*4; 
+
+    /* allocate front */
+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset);
+
+    if (!bo) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+    if (scrn->sarea->tiling_enabled) {
+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
+    }
+
+    /* allocate back */
+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset);
+
+    if (!bo) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+    if (scrn->sarea->tiling_enabled) {
+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
+    }
+
+    /* allocate depth */
+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset);
+
+    if (!bo) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+    bo->base.flags = 0;
+    if (scrn->sarea->tiling_enabled) {
+        bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE;
+        bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE;
+    }
+    return (struct radeon_bo_manager*)bom;
+}
+
+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    DRI_AGE_TEXTURES(boml->texture_heap);
+}
+
+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+        return 0;
+    }
+    return bo->size;
+}
+
+/*
+ * Fake up a bo for things like texture image_override.
+ * bo->offset already includes fb_location
+ */
+struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
+					      int size,
+	                                      uint32_t offset)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    struct bo_legacy *bo;
+
+    bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+    if (bo == NULL)
+	return NULL;
+    bo->static_bo = 1;
+    bo->offset = offset;
+    bo->base.handle = bo->offset;
+    bo->ptr = boml->screen->driScreen->pFB + (offset - boml->fb_location);
+    if (bo->base.handle > boml->nhandle) {
+        boml->nhandle = bo->base.handle + 1;
+    }
+    radeon_bo_ref(&(bo->base));
+    return &(bo->base);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
new file mode 100644
index 00000000000..2cf15dfaff3
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
@@ -0,0 +1,50 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BO_LEGACY_H
+#define RADEON_BO_LEGACY_H
+
+#include "radeon_screen.h"
+
+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending);
+int radeon_bo_legacy_validate(struct radeon_bo *bo,
+                              uint32_t *soffset,
+                              uint32_t *eoffset);
+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn);
+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom);
+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom);
+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo);
+struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
+					      int size,
+	                                      uint32_t offset);
+void legacy_track_pending(struct radeon_bo_manager *bom, int debug);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
new file mode 100644
index 00000000000..4520a7d7d49
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -0,0 +1,99 @@
+#ifndef RADEON_CS_WRAPPER_H
+#define RADEON_CS_WRAPPER_H
+
+#ifdef HAVE_LIBDRM_RADEON
+
+#include "radeon_bo.h"
+#include "radeon_bo_gem.h"
+#include "radeon_cs.h"
+#include "radeon_cs_gem.h"
+
+#else
+#include <stdint.h>
+
+#define RADEON_GEM_DOMAIN_CPU 0x1   // Cached CPU domain
+#define RADEON_GEM_DOMAIN_GTT 0x2   // GTT or cache flushed
+#define RADEON_GEM_DOMAIN_VRAM 0x4  // VRAM domain
+
+#define RADEON_TILING_MACRO 0x1
+#define RADEON_TILING_MICRO 0x2
+#define RADEON_TILING_SWAP 0x4
+#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface
+				   * when mapped - i.e. front buffer */
+
+/* to be used to build locally in mesa with no libdrm bits */
+#include "../radeon/radeon_bo_drm.h"
+#include "../radeon/radeon_cs_drm.h"
+
+#ifndef DRM_RADEON_GEM_INFO
+#define DRM_RADEON_GEM_INFO 0x1c
+
+struct drm_radeon_gem_info {
+        uint64_t gart_size;
+        uint64_t vram_size;
+        uint64_t vram_visible;
+};
+
+struct drm_radeon_info {
+	uint32_t request;
+	uint32_t pad;
+	uint32_t value;
+};
+#endif
+
+#ifndef RADEON_PARAM_DEVICE_ID
+#define RADEON_PARAM_DEVICE_ID 16
+#endif
+
+#ifndef RADEON_PARAM_NUM_Z_PIPES
+#define RADEON_PARAM_NUM_Z_PIPES 17
+#endif
+
+#ifndef RADEON_INFO_DEVICE_ID
+#define RADEON_INFO_DEVICE_ID 0
+#endif
+#ifndef RADEON_INFO_NUM_GB_PIPES
+#define RADEON_INFO_NUM_GB_PIPES 0
+#endif
+
+#ifndef RADEON_INFO_NUM_Z_PIPES
+#define RADEON_INFO_NUM_Z_PIPES 0
+#endif
+
+#ifndef DRM_RADEON_INFO
+#define DRM_RADEON_INFO 0x1
+#endif
+
+
+static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
+{
+  return 0;
+}
+
+static inline void *radeon_bo_manager_gem_ctor(int fd)
+{
+  return NULL;
+}
+
+static inline void radeon_bo_manager_gem_dtor(void *dummy)
+{
+}
+
+static inline void *radeon_cs_manager_gem_ctor(int fd)
+{
+  return NULL;
+}
+
+static inline void radeon_cs_manager_gem_dtor(void *dummy)
+{
+}
+
+static inline void radeon_tracker_print(void *ptr, int io)
+{
+}
+#endif
+
+#include "radeon_bo_legacy.h"
+#include "radeon_cs_legacy.h"
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
new file mode 100644
index 00000000000..a24b6dac265
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_buffer_objects.h"
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/bufferobj.h"
+
+#include "radeon_common.h"
+
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj)
+{
+    return (struct radeon_buffer_object *) obj;
+}
+
+static struct gl_buffer_object *
+radeonNewBufferObject(GLcontext * ctx,
+                      GLuint name,
+                      GLenum target)
+{
+    struct radeon_buffer_object *obj = CALLOC_STRUCT(radeon_buffer_object);
+
+    _mesa_initialize_buffer_object(&obj->Base, name, target);
+
+    obj->bo = NULL;
+
+    return &obj->Base;
+}
+
+/**
+ * Called via glDeleteBuffersARB().
+ */
+static void
+radeonDeleteBufferObject(GLcontext * ctx,
+                         struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (obj->Pointer) {
+        radeon_bo_unmap(radeon_obj->bo);
+    }
+
+    if (radeon_obj->bo) {
+        radeon_bo_unref(radeon_obj->bo);
+    }
+
+    _mesa_free(radeon_obj);
+}
+
+
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+radeonBufferData(GLcontext * ctx,
+                 GLenum target,
+                 GLsizeiptrARB size,
+                 const GLvoid * data,
+                 GLenum usage,
+                 struct gl_buffer_object *obj)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    radeon_obj->Base.Size = size;
+    radeon_obj->Base.Usage = usage;
+
+    if (radeon_obj->bo != NULL) {
+        radeon_bo_unref(radeon_obj->bo);
+        radeon_obj->bo = NULL;
+    }
+
+    if (size != 0) {
+        radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom,
+                                        0,
+                                        size,
+                                        32,
+                                        RADEON_GEM_DOMAIN_GTT,
+                                        0);
+
+        if (!radeon_obj->bo)
+            return GL_FALSE;
+
+        if (data != NULL) {
+            radeon_bo_map(radeon_obj->bo, GL_TRUE);
+
+            _mesa_memcpy(radeon_obj->bo->ptr, data, size);
+
+            radeon_bo_unmap(radeon_obj->bo);
+        }
+    }
+    return GL_TRUE;
+}
+
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+radeonBufferSubData(GLcontext * ctx,
+                    GLenum target,
+                    GLintptrARB offset,
+                    GLsizeiptrARB size,
+                    const GLvoid * data,
+                    struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    radeon_bo_map(radeon_obj->bo, GL_TRUE);
+
+    _mesa_memcpy(radeon_obj->bo->ptr + offset, data, size);
+
+    radeon_bo_unmap(radeon_obj->bo);
+}
+
+/**
+ * Called via glGetBufferSubDataARB()
+ */
+static void
+radeonGetBufferSubData(GLcontext * ctx,
+                       GLenum target,
+                       GLintptrARB offset,
+                       GLsizeiptrARB size,
+                       GLvoid * data,
+                       struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    radeon_bo_map(radeon_obj->bo, GL_FALSE);
+
+    _mesa_memcpy(data, radeon_obj->bo->ptr + offset, size);
+
+    radeon_bo_unmap(radeon_obj->bo);
+}
+
+/**
+ * Called via glMapBufferARB()
+ */
+static void *
+radeonMapBuffer(GLcontext * ctx,
+                GLenum target,
+                GLenum access,
+                struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (access == GL_WRITE_ONLY_ARB) {
+        ctx->Driver.Flush(ctx);
+    }
+
+    if (radeon_obj->bo == NULL) {
+        obj->Pointer = NULL;
+        return NULL;
+    }
+
+    radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
+
+    return obj->Pointer = radeon_obj->bo->ptr;
+}
+
+
+/**
+ * Called via glUnmapBufferARB()
+ */
+static GLboolean
+radeonUnmapBuffer(GLcontext * ctx,
+                  GLenum target,
+                  struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (radeon_obj->bo != NULL) {
+        radeon_bo_unmap(radeon_obj->bo);
+        obj->Pointer = NULL;
+    }
+
+    return GL_TRUE;
+}
+
+void
+radeonInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+    functions->NewBufferObject = radeonNewBufferObject;
+    functions->DeleteBuffer = radeonDeleteBufferObject;
+    functions->BufferData = radeonBufferData;
+    functions->BufferSubData = radeonBufferSubData;
+    functions->GetBufferSubData = radeonGetBufferSubData;
+    functions->MapBuffer = radeonMapBuffer;
+    functions->UnmapBuffer = radeonUnmapBuffer;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h
new file mode 100644
index 00000000000..d681960825a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_BUFFER_OBJECTS_H
+#define RADEON_BUFFER_OBJECTS_H
+
+#include "main/mtypes.h"
+
+struct radeon_bo;
+
+/**
+ * Radeon vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct radeon_buffer_object
+{
+   struct gl_buffer_object Base;
+   struct radeon_bo *bo;
+};
+
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj);
+
+/**
+ * Hook the bufferobject implementation into mesa:
+ */
+void radeonInitBufferObjectFuncs(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index f6bd1eb83fb..46a9cd5ff84 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -255,6 +255,145 @@
 #define PCI_CHIP_RS740_796E             0x796E
 #define PCI_CHIP_RS740_796F             0x796F
 
+#define PCI_CHIP_R600_9400              0x9400
+#define PCI_CHIP_R600_9401              0x9401
+#define PCI_CHIP_R600_9402              0x9402
+#define PCI_CHIP_R600_9403              0x9403
+#define PCI_CHIP_R600_9405              0x9405
+#define PCI_CHIP_R600_940A              0x940A
+#define PCI_CHIP_R600_940B              0x940B
+#define PCI_CHIP_R600_940F              0x940F
+
+#define PCI_CHIP_RV610_94C0             0x94C0
+#define PCI_CHIP_RV610_94C1             0x94C1
+#define PCI_CHIP_RV610_94C3             0x94C3
+#define PCI_CHIP_RV610_94C4             0x94C4
+#define PCI_CHIP_RV610_94C5             0x94C5
+#define PCI_CHIP_RV610_94C6             0x94C6
+#define PCI_CHIP_RV610_94C7             0x94C7
+#define PCI_CHIP_RV610_94C8             0x94C8
+#define PCI_CHIP_RV610_94C9             0x94C9
+#define PCI_CHIP_RV610_94CB             0x94CB
+#define PCI_CHIP_RV610_94CC             0x94CC
+#define PCI_CHIP_RV610_94CD             0x94CD
+
+#define PCI_CHIP_RV630_9580             0x9580
+#define PCI_CHIP_RV630_9581             0x9581
+#define PCI_CHIP_RV630_9583             0x9583
+#define PCI_CHIP_RV630_9586             0x9586
+#define PCI_CHIP_RV630_9587             0x9587
+#define PCI_CHIP_RV630_9588             0x9588
+#define PCI_CHIP_RV630_9589             0x9589
+#define PCI_CHIP_RV630_958A             0x958A
+#define PCI_CHIP_RV630_958B             0x958B
+#define PCI_CHIP_RV630_958C             0x958C
+#define PCI_CHIP_RV630_958D             0x958D
+#define PCI_CHIP_RV630_958E             0x958E
+#define PCI_CHIP_RV630_958F             0x958F
+
+#define PCI_CHIP_RV670_9500             0x9500
+#define PCI_CHIP_RV670_9501             0x9501
+#define PCI_CHIP_RV670_9504             0x9504
+#define PCI_CHIP_RV670_9505             0x9505
+#define PCI_CHIP_RV670_9506             0x9506
+#define PCI_CHIP_RV670_9507             0x9507
+#define PCI_CHIP_RV670_9508             0x9508
+#define PCI_CHIP_RV670_9509             0x9509
+#define PCI_CHIP_RV670_950F             0x950F
+#define PCI_CHIP_RV670_9511             0x9511
+#define PCI_CHIP_RV670_9515             0x9515
+#define PCI_CHIP_RV670_9517             0x9517
+#define PCI_CHIP_RV670_9519             0x9519
+
+#define PCI_CHIP_RV620_95C0             0x95C0
+#define PCI_CHIP_RV620_95C2             0x95C2
+#define PCI_CHIP_RV620_95C4             0x95C4
+#define PCI_CHIP_RV620_95C5             0x95C5
+#define PCI_CHIP_RV620_95C6             0x95C6
+#define PCI_CHIP_RV620_95C7             0x95C7
+#define PCI_CHIP_RV620_95C9             0x95C9
+#define PCI_CHIP_RV620_95CC             0x95CC
+#define PCI_CHIP_RV620_95CD             0x95CD
+#define PCI_CHIP_RV620_95CE             0x95CE
+#define PCI_CHIP_RV620_95CF             0x95CF
+
+#define PCI_CHIP_RV635_9590             0x9590
+#define PCI_CHIP_RV635_9591             0x9591
+#define PCI_CHIP_RV635_9593             0x9593
+#define PCI_CHIP_RV635_9595             0x9595
+#define PCI_CHIP_RV635_9596             0x9596
+#define PCI_CHIP_RV635_9597             0x9597
+#define PCI_CHIP_RV635_9598             0x9598
+#define PCI_CHIP_RV635_9599             0x9599
+#define PCI_CHIP_RV635_959B             0x959B
+
+#define PCI_CHIP_RS780_9610             0x9610
+#define PCI_CHIP_RS780_9611             0x9611
+#define PCI_CHIP_RS780_9612             0x9612
+#define PCI_CHIP_RS780_9613             0x9613
+#define PCI_CHIP_RS780_9614             0x9614
+#define PCI_CHIP_RS780_9615             0x9615
+#define PCI_CHIP_RS780_9616             0x9616
+
+#define PCI_CHIP_RS880_9710             0x9710
+#define PCI_CHIP_RS880_9711             0x9711
+#define PCI_CHIP_RS880_9712             0x9712
+#define PCI_CHIP_RS880_9713             0x9713
+#define PCI_CHIP_RS880_9714             0x9714
+
+#define PCI_CHIP_RV770_9440             0x9440
+#define PCI_CHIP_RV770_9441             0x9441
+#define PCI_CHIP_RV770_9442             0x9442
+#define PCI_CHIP_RV770_9443             0x9443
+#define PCI_CHIP_RV770_9444             0x9444
+#define PCI_CHIP_RV770_9446             0x9446
+#define PCI_CHIP_RV770_944A             0x944A
+#define PCI_CHIP_RV770_944B             0x944B
+#define PCI_CHIP_RV770_944C             0x944C
+#define PCI_CHIP_RV770_944E             0x944E
+#define PCI_CHIP_RV770_9450             0x9450
+#define PCI_CHIP_RV770_9452             0x9452
+#define PCI_CHIP_RV770_9456             0x9456
+#define PCI_CHIP_RV770_945A             0x945A
+#define PCI_CHIP_RV770_945B             0x945B
+#define PCI_CHIP_RV790_9460             0x9460
+#define PCI_CHIP_RV790_9462             0x9462
+#define PCI_CHIP_RV770_946A             0x946A
+#define PCI_CHIP_RV770_946B             0x946B
+#define PCI_CHIP_RV770_947A             0x947A
+#define PCI_CHIP_RV770_947B             0x947B
+
+#define PCI_CHIP_RV730_9480             0x9480
+#define PCI_CHIP_RV730_9487             0x9487
+#define PCI_CHIP_RV730_9488             0x9488
+#define PCI_CHIP_RV730_9489             0x9489
+#define PCI_CHIP_RV730_948F             0x948F
+#define PCI_CHIP_RV730_9490             0x9490
+#define PCI_CHIP_RV730_9491             0x9491
+#define PCI_CHIP_RV730_9495             0x9495
+#define PCI_CHIP_RV730_9498             0x9498
+#define PCI_CHIP_RV730_949C             0x949C
+#define PCI_CHIP_RV730_949E             0x949E
+#define PCI_CHIP_RV730_949F             0x949F
+
+#define PCI_CHIP_RV710_9540             0x9540
+#define PCI_CHIP_RV710_9541             0x9541
+#define PCI_CHIP_RV710_9542             0x9542
+#define PCI_CHIP_RV710_954E             0x954E
+#define PCI_CHIP_RV710_954F             0x954F
+#define PCI_CHIP_RV710_9552             0x9552
+#define PCI_CHIP_RV710_9553             0x9553
+#define PCI_CHIP_RV710_9555             0x9555
+#define PCI_CHIP_RV710_9557             0x9557
+
+#define PCI_CHIP_RV740_94A0             0x94A0
+#define PCI_CHIP_RV740_94A1             0x94A1
+#define PCI_CHIP_RV740_94A3             0x94A3
+#define PCI_CHIP_RV740_94B1             0x94B1
+#define PCI_CHIP_RV740_94B3             0x94B3
+#define PCI_CHIP_RV740_94B4             0x94B4
+#define PCI_CHIP_RV740_94B5             0x94B5
+#define PCI_CHIP_RV740_94B9             0x94B9
 
 enum {
    CHIP_FAMILY_R100,
@@ -282,6 +421,18 @@ enum {
    CHIP_FAMILY_R580,
    CHIP_FAMILY_RV560,
    CHIP_FAMILY_RV570,
+   CHIP_FAMILY_R600,
+   CHIP_FAMILY_RV610,
+   CHIP_FAMILY_RV630,
+   CHIP_FAMILY_RV670,
+   CHIP_FAMILY_RV620,
+   CHIP_FAMILY_RV635,
+   CHIP_FAMILY_RS780,
+   CHIP_FAMILY_RS880,
+   CHIP_FAMILY_RV770,
+   CHIP_FAMILY_RV730,
+   CHIP_FAMILY_RV710,
+   CHIP_FAMILY_RV740,
    CHIP_FAMILY_LAST
 };
 
@@ -289,6 +440,7 @@ enum {
 #define RADEON_CLASS_R100		(0 << 0)
 #define RADEON_CLASS_R200		(1 << 0)
 #define RADEON_CLASS_R300		(2 << 0)
+#define RADEON_CLASS_R600		(3 << 0)
 #define RADEON_CLASS_MASK		(3 << 0)
 
 #define RADEON_CHIPSET_TCL		(1 << 2)	/* tcl support - any radeon */
diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
new file mode 100644
index 00000000000..6fcd1ce7ca6
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
@@ -0,0 +1,121 @@
+#ifndef COMMON_CMDBUF_H
+#define COMMON_CMDBUF_H
+
+#include "radeon_bocs_wrapper.h"
+
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
+void rcommonInitCmdBuf(radeonContextPtr rmesa);
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa);
+
+void rcommonBeginBatch(radeonContextPtr rmesa,
+		       int n,
+		       int dostate,
+		       const char *file,
+		       const char *function,
+		       int line);
+
+/* +r6/r7 : code here moved */
+
+#define CP_PACKET2  (2 << 30)
+#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET0_ONE(reg, n)	(RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET3(pkt, n)	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+
+/**
+ * Every function writing to the command buffer needs to declare this
+ * to get the necessary local variables.
+ */
+#define BATCH_LOCALS(rmesa) \
+	const radeonContextPtr b_l_rmesa = rmesa
+
+/**
+ * Prepare writing n dwords to the command buffer,
+ * including producing any necessary state emits on buffer wraparound.
+ */
+#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__)
+
+/**
+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
+ */
+#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__)
+
+/**
+ * Write one dword to the command buffer.
+ */
+#define OUT_BATCH(data) \
+	do { \
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\
+	} while(0)
+
+/**
+ * Write a relocated dword to the command buffer.
+ */
+#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) 	\
+	do { 							\
+	int  __offset = (offset);				\
+        if (0 && __offset) {					\
+            fprintf(stderr, "(%s:%s:%d) offset : %d\n",		\
+            __FILE__, __FUNCTION__, __LINE__, __offset);	\
+        }							\
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset);	\
+        radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, 		\
+                              bo, rd, wd, flags);		\
+	if (!b_l_rmesa->radeonScreen->kernel_mm) 		\
+		b_l_rmesa->cmdbuf.cs->section_cdw += 2;		\
+	} while(0)
+
+
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define OUT_BATCH_TABLE(ptr,n) \
+	do { \
+		radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, (ptr), (n));\
+	} while(0)
+
+/**
+ * Finish writing dwords to the command buffer.
+ * The number of (direct or indirect) OUT_BATCH calls between the previous
+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
+ */
+#define END_BATCH() \
+	do { \
+        radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
+	} while(0)
+
+/**
+ * After the last END_BATCH() of rendering, this indicates that flushing
+ * the command buffer now is okay.
+ */
+#define COMMIT_BATCH() \
+	do { \
+	} while(0)
+
+
+/** Single register write to command buffer; requires 2 dwords. */
+#define OUT_BATCH_REGVAL(reg, val) \
+	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \
+	OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define OUT_BATCH_REGSEQ(reg, count) \
+	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count)))
+
+/** Write a 32 bit float to the ring; requires 1 dword. */
+#define OUT_BATCH_FLOAT32(f) \
+	OUT_BATCH(radeonPackFloat32((f)))
+
+/* +r6/r7 : code here moved */
+
+/* Fire the buffered vertices no matter what.
+ */
+static INLINE void radeon_firevertices(radeonContextPtr radeon)
+{
+   if (radeon->cmdbuf.cs->cdw || radeon->dma.flush )
+      radeon->glCtx->Driver.Flush(radeon->glCtx); /* +r6/r7 */
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
new file mode 100644
index 00000000000..a4c7b40798a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -0,0 +1,1349 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/*
+   - Scissor implementation
+   - buffer swap/copy ioctls
+   - finish/flush
+   - state emission
+   - cmdbuffer management
+*/
+
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "drivers/common/meta.h"
+
+#include "vblank.h"
+
+#include "radeon_common.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_lock.h"
+#include "radeon_drm.h"
+#include "radeon_queryobj.h"
+
+/**
+ * Enable verbose debug output for emit code.
+ * 0 no output
+ * 1 most output
+ * 2 also print state alues
+ */
+#define RADEON_CMDBUF         0
+
+/* =============================================================
+ * Scissoring
+ */
+
+static GLboolean intersect_rect(drm_clip_rect_t * out,
+				drm_clip_rect_t * a, drm_clip_rect_t * b)
+{
+	*out = *a;
+	if (b->x1 > out->x1)
+		out->x1 = b->x1;
+	if (b->y1 > out->y1)
+		out->y1 = b->y1;
+	if (b->x2 < out->x2)
+		out->x2 = b->x2;
+	if (b->y2 < out->y2)
+		out->y2 = b->y2;
+	if (out->x1 >= out->x2)
+		return GL_FALSE;
+	if (out->y1 >= out->y2)
+		return GL_FALSE;
+	return GL_TRUE;
+}
+
+void radeonRecalcScissorRects(radeonContextPtr radeon)
+{
+	drm_clip_rect_t *out;
+	int i;
+
+	/* Grow cliprect store?
+	 */
+	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
+		while (radeon->state.scissor.numAllocedClipRects <
+		       radeon->numClipRects) {
+			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
+			radeon->state.scissor.numAllocedClipRects *= 2;
+		}
+
+		if (radeon->state.scissor.pClipRects)
+			FREE(radeon->state.scissor.pClipRects);
+
+		radeon->state.scissor.pClipRects =
+			MALLOC(radeon->state.scissor.numAllocedClipRects *
+			       sizeof(drm_clip_rect_t));
+
+		if (radeon->state.scissor.pClipRects == NULL) {
+			radeon->state.scissor.numAllocedClipRects = 0;
+			return;
+		}
+	}
+
+	out = radeon->state.scissor.pClipRects;
+	radeon->state.scissor.numClipRects = 0;
+
+	for (i = 0; i < radeon->numClipRects; i++) {
+		if (intersect_rect(out,
+				   &radeon->pClipRects[i],
+				   &radeon->state.scissor.rect)) {
+			radeon->state.scissor.numClipRects++;
+			out++;
+		}
+	}
+
+	if (radeon->vtbl.update_scissor)
+	   radeon->vtbl.update_scissor(radeon->glCtx);
+}
+
+void radeon_get_cliprects(radeonContextPtr radeon,
+			  struct drm_clip_rect **cliprects,
+			  unsigned int *num_cliprects,
+			  int *x_off, int *y_off)
+{
+	__DRIdrawablePrivate *dPriv = radeon_get_drawable(radeon);
+	struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+
+	if (radeon->constant_cliprect) {
+		radeon->fboRect.x1 = 0;
+		radeon->fboRect.y1 = 0;
+		radeon->fboRect.x2 = radeon->glCtx->DrawBuffer->Width;
+		radeon->fboRect.y2 = radeon->glCtx->DrawBuffer->Height;
+
+		*cliprects = &radeon->fboRect;
+		*num_cliprects = 1;
+		*x_off = 0;
+		*y_off = 0;
+	} else if (radeon->front_cliprects ||
+		   rfb->pf_active || dPriv->numBackClipRects == 0) {
+		*cliprects = dPriv->pClipRects;
+		*num_cliprects = dPriv->numClipRects;
+		*x_off = dPriv->x;
+		*y_off = dPriv->y;
+	} else {
+		*num_cliprects = dPriv->numBackClipRects;
+		*cliprects = dPriv->pBackClipRects;
+		*x_off = dPriv->backX;
+		*y_off = dPriv->backY;
+	}
+}
+
+/**
+ * Update cliprects and scissors.
+ */
+void radeonSetCliprects(radeonContextPtr radeon)
+{
+	__DRIdrawablePrivate *const drawable = radeon_get_drawable(radeon);
+	__DRIdrawablePrivate *const readable = radeon_get_readable(radeon);
+	struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate;
+	struct radeon_framebuffer *const read_rfb = readable->driverPrivate;
+	int x_off, y_off;
+
+	radeon_get_cliprects(radeon, &radeon->pClipRects,
+			     &radeon->numClipRects, &x_off, &y_off);
+
+	if ((draw_rfb->base.Width != drawable->w) ||
+	    (draw_rfb->base.Height != drawable->h)) {
+		_mesa_resize_framebuffer(radeon->glCtx, &draw_rfb->base,
+					 drawable->w, drawable->h);
+		draw_rfb->base.Initialized = GL_TRUE;
+	}
+
+	if (drawable != readable) {
+		if ((read_rfb->base.Width != readable->w) ||
+		    (read_rfb->base.Height != readable->h)) {
+			_mesa_resize_framebuffer(radeon->glCtx, &read_rfb->base,
+						 readable->w, readable->h);
+			read_rfb->base.Initialized = GL_TRUE;
+		}
+	}
+
+	if (radeon->state.scissor.enabled)
+		radeonRecalcScissorRects(radeon);
+
+}
+
+
+
+void radeonUpdateScissor( GLcontext *ctx )
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	GLint x = ctx->Scissor.X, y = ctx->Scissor.Y;
+	GLsizei w = ctx->Scissor.Width, h = ctx->Scissor.Height;
+	int x1, y1, x2, y2;
+	int min_x, min_y, max_x, max_y;
+
+	if (!ctx->DrawBuffer)
+	    return;
+	min_x = min_y = 0;
+	max_x = ctx->DrawBuffer->Width - 1;
+	max_y = ctx->DrawBuffer->Height - 1;
+
+	if ( !ctx->DrawBuffer->Name ) {
+		x1 = x;
+		y1 = ctx->DrawBuffer->Height - (y + h);
+		x2 = x + w - 1;
+		y2 = y1 + h - 1;
+	} else {
+		x1 = x;
+		y1 = y;
+		x2 = x + w - 1;
+		y2 = y + h - 1;
+
+	}
+	if (!rmesa->radeonScreen->kernel_mm) {
+	   /* Fix scissors for dri 1 */
+
+	   __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa);
+	   x1 += dPriv->x;
+	   x2 += dPriv->x + 1;
+	   min_x += dPriv->x;
+	   max_x += dPriv->x + 1;
+	   y1 += dPriv->y;
+	   y2 += dPriv->y + 1;
+	   min_y += dPriv->y;
+	   max_y += dPriv->y + 1;
+	}
+
+	rmesa->state.scissor.rect.x1 = CLAMP(x1,  min_x, max_x);
+	rmesa->state.scissor.rect.y1 = CLAMP(y1,  min_y, max_y);
+	rmesa->state.scissor.rect.x2 = CLAMP(x2,  min_x, max_x);
+	rmesa->state.scissor.rect.y2 = CLAMP(y2,  min_y, max_y);
+
+	radeonRecalcScissorRects( rmesa );
+}
+
+/* =============================================================
+ * Scissoring
+ */
+
+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	if (ctx->Scissor.Enabled) {
+		/* We don't pipeline cliprect changes */
+		radeon_firevertices(radeon);
+		radeonUpdateScissor(ctx);
+	}
+}
+
+void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
+{
+   radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+   GLuint i;
+   drm_radeon_stipple_t stipple;
+
+   /* Must flip pattern upside down.
+   */
+   for ( i = 0 ; i < 32 ; i++ ) {
+      stipple.mask[31 - i] = ((GLuint *) mask)[i];
+   }
+
+   /* TODO: push this into cmd mechanism
+   */
+   radeon_firevertices(radeon);
+   LOCK_HARDWARE( radeon );
+
+   drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE,
+	 &stipple, sizeof(stipple) );
+   UNLOCK_HARDWARE( radeon );
+}
+
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
+{
+	drm_radeon_getparam_t gp;
+	int ret;
+	uint32_t frame = 0;
+
+	gp.param = RADEON_PARAM_LAST_FRAME;
+	gp.value = (int *)&frame;
+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+				  &gp, sizeof(gp));
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+
+	return frame;
+}
+
+uint32_t radeonGetAge(radeonContextPtr radeon)
+{
+	drm_radeon_getparam_t gp;
+	int ret;
+	uint32_t age;
+
+	gp.param = RADEON_PARAM_LAST_CLEAR;
+	gp.value = (int *)&age;
+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+				  &gp, sizeof(gp));
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+
+	return age;
+}
+
+static void radeonEmitIrqLocked(radeonContextPtr radeon)
+{
+	drm_radeon_irq_emit_t ie;
+	int ret;
+
+	ie.irq_seq = &radeon->iw.irq_seq;
+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
+				  &ie, sizeof(ie));
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+}
+
+static void radeonWaitIrq(radeonContextPtr radeon)
+{
+	int ret;
+
+	do {
+		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
+				      &radeon->iw, sizeof(radeon->iw));
+	} while (ret && (errno == EINTR || errno == EBUSY));
+
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+}
+
+static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
+{
+	drm_radeon_sarea_t *sarea = radeon->sarea;
+
+	if (radeon->do_irqs) {
+		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
+			if (!radeon->irqsEmitted) {
+				while (radeonGetLastFrame(radeon) <
+				       sarea->last_frame) ;
+			} else {
+				UNLOCK_HARDWARE(radeon);
+				radeonWaitIrq(radeon);
+				LOCK_HARDWARE(radeon);
+			}
+			radeon->irqsEmitted = 10;
+		}
+
+		if (radeon->irqsEmitted) {
+			radeonEmitIrqLocked(radeon);
+			radeon->irqsEmitted--;
+		}
+	} else {
+		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
+			UNLOCK_HARDWARE(radeon);
+			if (radeon->do_usleeps)
+				DO_USLEEP(1);
+			LOCK_HARDWARE(radeon);
+		}
+	}
+}
+
+/* wait for idle */
+void radeonWaitForIdleLocked(radeonContextPtr radeon)
+{
+	int ret;
+	int i = 0;
+
+	do {
+		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
+		if (ret)
+			DO_USLEEP(1);
+	} while (ret && ++i < 100);
+
+	if (ret < 0) {
+		UNLOCK_HARDWARE(radeon);
+		fprintf(stderr, "Error: R300 timed out... exiting\n");
+		exit(-1);
+	}
+}
+
+static void radeonWaitForIdle(radeonContextPtr radeon)
+{
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+        LOCK_HARDWARE(radeon);
+	    radeonWaitForIdleLocked(radeon);
+	    UNLOCK_HARDWARE(radeon);
+    }
+}
+
+static void radeon_flip_renderbuffers(struct radeon_framebuffer *rfb)
+{
+	int current_page = rfb->pf_current_page;
+	int next_page = (current_page + 1) % rfb->pf_num_pages;
+	struct gl_renderbuffer *tmp_rb;
+
+	/* Exchange renderbuffers if necessary but make sure their
+	 * reference counts are preserved.
+	 */
+	if (rfb->color_rb[current_page] &&
+	    rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
+	    &rfb->color_rb[current_page]->base) {
+		tmp_rb = NULL;
+		_mesa_reference_renderbuffer(&tmp_rb,
+					     rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+		tmp_rb = &rfb->color_rb[current_page]->base;
+		_mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
+		_mesa_reference_renderbuffer(&tmp_rb, NULL);
+	}
+
+	if (rfb->color_rb[next_page] &&
+	    rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
+	    &rfb->color_rb[next_page]->base) {
+		tmp_rb = NULL;
+		_mesa_reference_renderbuffer(&tmp_rb,
+					     rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+		tmp_rb = &rfb->color_rb[next_page]->base;
+		_mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
+		_mesa_reference_renderbuffer(&tmp_rb, NULL);
+	}
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
+		       const drm_clip_rect_t	  *rect)
+{
+	radeonContextPtr rmesa;
+	struct radeon_framebuffer *rfb;
+	GLint nbox, i, ret;
+
+	assert(dPriv);
+	assert(dPriv->driContextPriv);
+	assert(dPriv->driContextPriv->driverPrivate);
+
+	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+	LOCK_HARDWARE(rmesa);
+
+	rfb = dPriv->driverPrivate;
+
+	if ( RADEON_DEBUG & RADEON_IOCTL ) {
+		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
+	}
+
+	nbox = dPriv->numClipRects; /* must be in locked region */
+
+	for ( i = 0 ; i < nbox ; ) {
+		GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+		drm_clip_rect_t *box = dPriv->pClipRects;
+		drm_clip_rect_t *b = rmesa->sarea->boxes;
+		GLint n = 0;
+
+		for ( ; i < nr ; i++ ) {
+
+			*b = box[i];
+
+			if (rect)
+			{
+				if (rect->x1 > b->x1)
+					b->x1 = rect->x1;
+				if (rect->y1 > b->y1)
+					b->y1 = rect->y1;
+				if (rect->x2 < b->x2)
+					b->x2 = rect->x2;
+				if (rect->y2 < b->y2)
+					b->y2 = rect->y2;
+
+				if (b->x1 >= b->x2 || b->y1 >= b->y2)
+					continue;
+			}
+
+			b++;
+			n++;
+		}
+		rmesa->sarea->nbox = n;
+
+		if (!n)
+			continue;
+
+		ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+
+		if ( ret ) {
+			fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+			UNLOCK_HARDWARE( rmesa );
+			exit( 1 );
+		}
+	}
+
+	UNLOCK_HARDWARE( rmesa );
+}
+
+static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_target)
+{
+	radeonContextPtr rmesa;
+
+	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+	radeon_firevertices(rmesa);
+
+	LOCK_HARDWARE( rmesa );
+
+	if (!dPriv->numClipRects) {
+		UNLOCK_HARDWARE(rmesa);
+		usleep(10000);	/* throttle invisible client 10ms */
+		return 0;
+	}
+
+	radeonWaitForFrameCompletion(rmesa);
+
+	UNLOCK_HARDWARE(rmesa);
+	driWaitForVBlank(dPriv, missed_target);
+
+	return 0;
+}
+
+static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv )
+{
+	radeonContextPtr radeon;
+	GLint ret;
+	__DRIscreenPrivate *psp;
+	struct radeon_renderbuffer *rrb;
+	struct radeon_framebuffer *rfb;
+
+	assert(dPriv);
+	assert(dPriv->driContextPriv);
+	assert(dPriv->driContextPriv->driverPrivate);
+
+	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+	rfb = dPriv->driverPrivate;
+	rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+
+	psp = dPriv->driScreenPriv;
+
+	LOCK_HARDWARE(radeon);
+
+	if ( RADEON_DEBUG & RADEON_IOCTL ) {
+		fprintf(stderr, "%s: pfCurrentPage: %d %d\n", __FUNCTION__,
+			radeon->sarea->pfCurrentPage, radeon->sarea->pfState);
+	}
+	drm_clip_rect_t *box = dPriv->pClipRects;
+	drm_clip_rect_t *b = radeon->sarea->boxes;
+	b[0] = box[0];
+	radeon->sarea->nbox = 1;
+
+	ret = drmCommandNone( radeon->dri.fd, DRM_RADEON_FLIP );
+
+	UNLOCK_HARDWARE(radeon);
+
+	if ( ret ) {
+		fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+		return GL_FALSE;
+	}
+
+	if (!rfb->pf_active)
+		return GL_FALSE;
+
+	rfb->pf_current_page = radeon->sarea->pfCurrentPage;
+	radeon_flip_renderbuffers(rfb);
+	radeon_draw_buffer(radeon->glCtx, &rfb->base);
+
+	return GL_TRUE;
+}
+
+
+/**
+ * Swap front and back buffer.
+ */
+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
+{
+	int64_t ust;
+	__DRIscreenPrivate *psp;
+
+	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+		radeonContextPtr radeon;
+		GLcontext *ctx;
+
+		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+		ctx = radeon->glCtx;
+
+		if (ctx->Visual.doubleBufferMode) {
+			GLboolean missed_target;
+			struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+			_mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */
+
+			radeonScheduleSwap(dPriv, &missed_target);
+
+			if (rfb->pf_active) {
+				radeonPageFlip(dPriv);
+			} else {
+				radeonCopyBuffer(dPriv, NULL);
+			}
+
+			psp = dPriv->driScreenPriv;
+
+			rfb->swap_count++;
+			(*psp->systemTime->getUST)( & ust );
+			if ( missed_target ) {
+				rfb->swap_missed_count++;
+				rfb->swap_missed_ust = ust - rfb->swap_ust;
+			}
+
+			rfb->swap_ust = ust;
+			radeon->hw.all_dirty = GL_TRUE;
+		}
+	} else {
+		/* XXX this shouldn't be an error but we can't handle it for now */
+		_mesa_problem(NULL, "%s: drawable has no context!",
+			      __FUNCTION__);
+	}
+}
+
+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+			 int x, int y, int w, int h )
+{
+	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+		radeonContextPtr radeon;
+		GLcontext *ctx;
+
+		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+		ctx = radeon->glCtx;
+
+		if (ctx->Visual.doubleBufferMode) {
+			drm_clip_rect_t rect;
+			rect.x1 = x + dPriv->x;
+			rect.y1 = (dPriv->h - y - h) + dPriv->y;
+			rect.x2 = rect.x1 + w;
+			rect.y2 = rect.y1 + h;
+			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
+			radeonCopyBuffer(dPriv, &rect);
+		}
+	} else {
+		/* XXX this shouldn't be an error but we can't handle it for now */
+		_mesa_problem(NULL, "%s: drawable has no context!",
+			      __FUNCTION__);
+	}
+}
+
+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL,
+		*rrbColor = NULL;
+	uint32_t offset = 0;
+
+
+	if (!fb) {
+		/* this can happen during the initial context initialization */
+		return;
+	}
+
+	/* radeons only handle 1 color draw so far */
+	if (fb->_NumColorDrawBuffers != 1) {
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+		return;
+	}
+
+	/* Do this here, note core Mesa, since this function is called from
+	 * many places within the driver.
+	 */
+	if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+		/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+		_mesa_update_framebuffer(ctx);
+		/* this updates the DrawBuffer's Width/Height if it's a FBO */
+		_mesa_update_draw_buffer_bounds(ctx);
+	}
+
+	if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+		/* this may occur when we're called by glBindFrameBuffer() during
+		 * the process of someone setting up renderbuffers, etc.
+		 */
+		/*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+		return;
+	}
+
+	if (fb->Name)
+		;/* do something depthy/stencily TODO */
+
+
+		/* none */
+	if (fb->Name == 0) {
+		if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+			rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+			radeon->front_cliprects = GL_TRUE;
+			radeon->front_buffer_dirty = GL_TRUE;
+		} else {
+			rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+			radeon->front_cliprects = GL_FALSE;
+		}
+	} else {
+		/* user FBO in theory */
+		struct radeon_renderbuffer *rrb;
+		rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[0]);
+		if (rrb) {
+			offset = rrb->draw_offset;
+			rrbColor = rrb;
+		}
+		radeon->constant_cliprect = GL_TRUE;
+	}
+
+	if (rrbColor == NULL)
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+	else
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE);
+
+
+	if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
+		rrbDepth = radeon_renderbuffer(fb->_DepthBuffer->Wrapped);
+		if (rrbDepth && rrbDepth->bo) {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+		} else {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_TRUE);
+		}
+	} else {
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+		rrbDepth = NULL;
+	}
+
+	if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
+		rrbStencil = radeon_renderbuffer(fb->_StencilBuffer->Wrapped);
+		if (rrbStencil && rrbStencil->bo) {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+			/* need to re-compute stencil hw state */
+			if (!rrbDepth)
+				rrbDepth = rrbStencil;
+		} else {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_TRUE);
+		}
+	} else {
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+		if (ctx->Driver.Enable != NULL)
+			ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+		else
+			ctx->NewState |= _NEW_STENCIL;
+	}
+
+	/* Update culling direction which changes depending on the
+	 * orientation of the buffer:
+	 */
+	if (ctx->Driver.FrontFace)
+		ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+	else
+		ctx->NewState |= _NEW_POLYGON;
+
+	/*
+	 * Update depth test state
+	 */
+	if (ctx->Driver.Enable) {
+		ctx->Driver.Enable(ctx, GL_DEPTH_TEST,
+				   (ctx->Depth.Test && fb->Visual.depthBits > 0));
+		/* Need to update the derived ctx->Stencil._Enabled first */
+		ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
+				   (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0));
+	} else {
+		ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
+	}
+
+	_mesa_reference_renderbuffer(&radeon->state.depth.rb, &rrbDepth->base);
+	_mesa_reference_renderbuffer(&radeon->state.color.rb, &rrbColor->base);
+	radeon->state.color.draw_offset = offset;
+
+#if 0
+	/* update viewport since it depends on window size */
+	if (ctx->Driver.Viewport) {
+		ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+				     ctx->Viewport.Width, ctx->Viewport.Height);
+	} else {
+
+	}
+#endif
+	ctx->NewState |= _NEW_VIEWPORT;
+
+	/* Set state we know depends on drawable parameters:
+	 */
+	radeonUpdateScissor(ctx);
+	radeon->NewGLState |= _NEW_SCISSOR;
+
+	if (ctx->Driver.DepthRange)
+		ctx->Driver.DepthRange(ctx,
+				       ctx->Viewport.Near,
+				       ctx->Viewport.Far);
+
+	/* Update culling direction which changes depending on the
+	 * orientation of the buffer:
+	 */
+	if (ctx->Driver.FrontFace)
+		ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+	else
+		ctx->NewState |= _NEW_POLYGON;
+}
+
+/**
+ * Called via glDrawBuffer.
+ */
+void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+	if (RADEON_DEBUG & RADEON_DRI)
+		fprintf(stderr, "%s %s\n", __FUNCTION__,
+			_mesa_lookup_enum_by_nr( mode ));
+
+	if (ctx->DrawBuffer->Name == 0) {
+		radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+		const GLboolean was_front_buffer_rendering =
+			radeon->is_front_buffer_rendering;
+
+		radeon->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) ||
+                                            (mode == GL_FRONT);
+
+      /* If we weren't front-buffer rendering before but we are now, make sure
+       * that the front-buffer has actually been allocated.
+       */
+		if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
+			radeon_update_renderbuffers(radeon->dri.context,
+				radeon->dri.context->driDrawablePriv);
+      }
+	}
+
+	radeon_draw_buffer(ctx, ctx->DrawBuffer);
+}
+
+void radeonReadBuffer( GLcontext *ctx, GLenum mode )
+{
+	if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+		struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
+		const GLboolean was_front_buffer_reading = rmesa->is_front_buffer_reading;
+		rmesa->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+					|| (mode == GL_FRONT);
+
+		if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
+			radeon_update_renderbuffers(rmesa->dri.context,
+						    rmesa->dri.context->driReadablePriv);
+	 	}
+	}
+	/* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+	if (ctx->ReadBuffer == ctx->DrawBuffer) {
+		/* This will update FBO completeness status.
+		 * A framebuffer will be incomplete if the GL_READ_BUFFER setting
+		 * refers to a missing renderbuffer.  Calling glReadBuffer can set
+		 * that straight and can make the drawing buffer complete.
+		 */
+		radeon_draw_buffer(ctx, ctx->DrawBuffer);
+	}
+}
+
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+void radeonUpdatePageFlipping(radeonContextPtr radeon)
+{
+	struct radeon_framebuffer *rfb = radeon_get_drawable(radeon)->driverPrivate;
+
+	rfb->pf_active = radeon->sarea->pfState;
+	rfb->pf_current_page = radeon->sarea->pfCurrentPage;
+	rfb->pf_num_pages = 2;
+	radeon_flip_renderbuffers(rfb);
+	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
+}
+
+void radeon_window_moved(radeonContextPtr radeon)
+{
+	/* Cliprects has to be updated before doing anything else */
+	radeonSetCliprects(radeon);
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+		radeonUpdatePageFlipping(radeon);
+	}
+}
+
+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	__DRIcontext *driContext = radeon->dri.context;
+	void (*old_viewport)(GLcontext *ctx, GLint x, GLint y,
+			     GLsizei w, GLsizei h);
+
+	if (!driContext->driScreenPriv->dri2.enabled)
+		return;
+
+	if (!radeon->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+		if (radeon->is_front_buffer_rendering) {
+			ctx->Driver.Flush(ctx);
+		}
+		radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
+		if (driContext->driDrawablePriv != driContext->driReadablePriv)
+			radeon_update_renderbuffers(driContext, driContext->driReadablePriv);
+	}
+
+	old_viewport = ctx->Driver.Viewport;
+	ctx->Driver.Viewport = NULL;
+	radeon_window_moved(radeon);
+	radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer);
+	ctx->Driver.Viewport = old_viewport;
+}
+
+static void radeon_print_state_atom_prekmm(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+	int i, j, reg;
+	int dwords = (*state->check) (radeon->glCtx, state);
+	drm_r300_cmd_header_t cmd;
+
+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+	if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+		if (dwords > state->cmd_size)
+			dwords = state->cmd_size;
+
+		for (i = 0; i < dwords;) {
+			cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
+			reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+					state->name, i, reg, cmd.packet0.count);
+			++i;
+			for (j = 0; j < cmd.packet0.count && i < dwords; j++) {
+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+						state->name, i, reg, state->cmd[i]);
+				reg += 4;
+				++i;
+			}
+		}
+	}
+}
+
+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+	int i, j, reg, count;
+	int dwords;
+	uint32_t packet0;
+	if (!radeon_is_debug_enabled(RADEON_STATE, RADEON_VERBOSE) )
+		return;
+
+	if (!radeon->radeonScreen->kernel_mm) {
+		radeon_print_state_atom_prekmm(radeon, state);
+		return;
+	}
+
+	dwords = (*state->check) (radeon->glCtx, state);
+
+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+	if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+		if (dwords > state->cmd_size)
+			dwords = state->cmd_size;
+		for (i = 0; i < dwords;) {
+			packet0 = state->cmd[i];
+			reg = (packet0 & 0x1FFF) << 2;
+			count = ((packet0 & 0x3FFF0000) >> 16) + 1;
+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+					state->name, i, reg, count);
+			++i;
+			for (j = 0; j < count && i < dwords; j++) {
+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+						state->name, i, reg, state->cmd[i]);
+				reg += 4;
+				++i;
+			}
+		}
+	}
+}
+
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon)
+{
+	struct radeon_state_atom *atom;
+	GLuint dwords = 0;
+	/* check if we are going to emit full state */
+
+	if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+		if (!radeon->hw.is_dirty)
+			goto out;
+		foreach(atom, &radeon->hw.atomlist) {
+			if (atom->dirty) {
+				const GLuint atom_size = atom->check(radeon->glCtx, atom);
+				dwords += atom_size;
+				if (RADEON_CMDBUF && atom_size) {
+					radeon_print_state_atom(radeon, atom);
+				}
+			}
+		}
+	} else {
+		foreach(atom, &radeon->hw.atomlist) {
+			const GLuint atom_size = atom->check(radeon->glCtx, atom);
+			dwords += atom_size;
+			if (RADEON_CMDBUF && atom_size) {
+				radeon_print_state_atom(radeon, atom);
+			}
+
+		}
+	}
+out:
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %u\n", __func__, dwords);
+	return dwords;
+}
+
+static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state_atom *atom)
+{
+	BATCH_LOCALS(radeon);
+	int dwords;
+
+	dwords = (*atom->check) (radeon->glCtx, atom);
+	if (dwords) {
+
+		radeon_print_state_atom(radeon, atom);
+
+		if (atom->emit) {
+			(*atom->emit)(radeon->glCtx, atom);
+		} else {
+			BEGIN_BATCH_NO_AUTOSTATE(dwords);
+			OUT_BATCH_TABLE(atom->cmd, dwords);
+			END_BATCH();
+		}
+	} else {
+		radeon_print(RADEON_STATE, RADEON_VERBOSE, "  skip state %s\n", atom->name);
+	}
+	atom->dirty = GL_FALSE;
+
+}
+
+static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll)
+{
+	struct radeon_state_atom *atom;
+
+	if (radeon->vtbl.pre_emit_atoms)
+		radeon->vtbl.pre_emit_atoms(radeon);
+
+	/* Emit actual atoms */
+	if (radeon->hw.all_dirty || emitAll) {
+		foreach(atom, &radeon->hw.atomlist)
+			radeon_emit_atom( radeon, atom );
+	} else {
+		foreach(atom, &radeon->hw.atomlist) {
+			if ( atom->dirty )
+				radeon_emit_atom( radeon, atom );
+		}
+	}
+
+	COMMIT_BATCH();
+}
+
+static GLboolean radeon_revalidate_bos(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	int ret;
+
+	ret = radeon_cs_space_check(radeon->cmdbuf.cs);
+	if (ret == RADEON_CS_SPACE_FLUSH)
+		return GL_FALSE;
+	return GL_TRUE;
+}
+
+void radeonEmitState(radeonContextPtr radeon)
+{
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __FUNCTION__);
+
+	if (radeon->vtbl.pre_emit_state)
+		radeon->vtbl.pre_emit_state(radeon);
+
+	/* this code used to return here but now it emits zbs */
+	if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
+		return;
+
+	if (!radeon->cmdbuf.cs->cdw) {
+		if (RADEON_DEBUG & RADEON_STATE)
+			fprintf(stderr, "Begin reemit state\n");
+
+		radeonEmitAtoms(radeon, GL_TRUE);
+	} else {
+
+		if (RADEON_DEBUG & RADEON_STATE)
+			fprintf(stderr, "Begin dirty state\n");
+
+		radeonEmitAtoms(radeon, GL_FALSE);
+	}
+
+	radeon->hw.is_dirty = GL_FALSE;
+	radeon->hw.all_dirty = GL_FALSE;
+}
+
+
+void radeonFlush(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw);
+
+	/* okay if we have no cmds in the buffer &&
+	   we have no DMA flush &&
+	   we have no DMA buffer allocated.
+	   then no point flushing anything at all.
+	*/
+	if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved))
+		return;
+
+	if (radeon->dma.flush)
+		radeon->dma.flush( ctx );
+
+	radeonEmitState(radeon);
+
+	if (radeon->cmdbuf.cs->cdw)
+		rcommonFlushCmdBuf(radeon, __FUNCTION__);
+
+	if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) {
+		__DRIscreen *const screen = radeon->radeonScreen->driScreen;
+
+		if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
+			&& (screen->dri2.loader->flushFrontBuffer != NULL)) {
+			__DRIdrawablePrivate * drawable = radeon_get_drawable(radeon);
+			(*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
+
+			/* Only clear the dirty bit if front-buffer rendering is no longer
+			 * enabled.  This is done so that the dirty bit can only be set in
+			 * glDrawBuffer.  Otherwise the dirty bit would have to be set at
+			 * each of N places that do rendering.  This has worse performances,
+			 * but it is much easier to get correct.
+			 */
+			if (!radeon->is_front_buffer_rendering) {
+				radeon->front_buffer_dirty = GL_FALSE;
+			}
+		}
+	}
+
+	make_empty_list(&radeon->query.not_flushed_head);
+
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish(GLcontext * ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	int i;
+
+	if (ctx->Driver.Flush)
+		ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+	if (radeon->radeonScreen->kernel_mm) {
+		for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+			struct radeon_renderbuffer *rrb;
+			rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[i]);
+			if (rrb && rrb->bo)
+				radeon_bo_wait(rrb->bo);
+		}
+		{
+			struct radeon_renderbuffer *rrb;
+			rrb = radeon_get_depthbuffer(radeon);
+			if (rrb && rrb->bo)
+				radeon_bo_wait(rrb->bo);
+		}
+	} else if (radeon->do_irqs) {
+		LOCK_HARDWARE(radeon);
+		radeonEmitIrqLocked(radeon);
+		UNLOCK_HARDWARE(radeon);
+		radeonWaitIrq(radeon);
+	} else {
+		radeonWaitForIdle(radeon);
+	}
+}
+
+/* cmdbuffer */
+/**
+ * Send the current command buffer via ioctl to the hardware.
+ */
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller)
+{
+	int ret = 0;
+
+	if (rmesa->cmdbuf.flushing) {
+		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
+		exit(-1);
+	}
+	rmesa->cmdbuf.flushing = 1;
+
+	if (RADEON_DEBUG & RADEON_IOCTL) {
+		fprintf(stderr, "%s from %s - %i cliprects\n",
+			__FUNCTION__, caller, rmesa->numClipRects);
+	}
+
+	radeonEmitQueryEnd(rmesa->glCtx);
+
+	if (rmesa->cmdbuf.cs->cdw) {
+		ret = radeon_cs_emit(rmesa->cmdbuf.cs);
+		rmesa->hw.all_dirty = GL_TRUE;
+	}
+	radeon_cs_erase(rmesa->cmdbuf.cs);
+	rmesa->cmdbuf.flushing = 0;
+
+	if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) {
+		fprintf(stderr,"failed to revalidate buffers\n");
+	}
+
+	return ret;
+}
+
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
+{
+	int ret;
+
+	radeonReleaseDmaRegions(rmesa);
+
+	LOCK_HARDWARE(rmesa);
+	ret = rcommonFlushCmdBufLocked(rmesa, caller);
+	UNLOCK_HARDWARE(rmesa);
+
+	if (ret) {
+		fprintf(stderr, "drmRadeonCmdBuffer: %d. Kernel failed to "
+				"parse or rejected command stream. See dmesg "
+				"for more info.\n", ret);
+		_mesa_exit(ret);
+	}
+
+	return ret;
+}
+
+/**
+ * Make sure that enough space is available in the command buffer
+ * by flushing if necessary.
+ *
+ * \param dwords The number of dwords we need to be free on the command buffer
+ */
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
+{
+   if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size
+	 || radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
+      /* If we try to flush empty buffer there is too big rendering operation. */
+      assert(rmesa->cmdbuf.cs->cdw);
+      rcommonFlushCmdBuf(rmesa, caller);
+      return GL_TRUE;
+   }
+   return GL_FALSE;
+}
+
+void rcommonInitCmdBuf(radeonContextPtr rmesa)
+{
+	GLuint size;
+	/* Initialize command buffer */
+	size = 256 * driQueryOptioni(&rmesa->optionCache,
+				     "command_buffer_size");
+	if (size < 2 * rmesa->hw.max_state_size) {
+		size = 2 * rmesa->hw.max_state_size + 65535;
+	}
+	if (size > 64 * 256)
+		size = 64 * 256;
+
+	radeon_print(RADEON_CS, RADEON_VERBOSE,
+			"sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t));
+	radeon_print(RADEON_CS, RADEON_VERBOSE,
+			"sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t));
+	radeon_print(RADEON_CS, RADEON_VERBOSE,
+			"Allocating %d bytes command buffer (max state is %d bytes)\n",
+			size * 4, rmesa->hw.max_state_size * 4);
+
+	if (rmesa->radeonScreen->kernel_mm) {
+		int fd = rmesa->radeonScreen->driScreen->fd;
+		rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
+	} else {
+		rmesa->cmdbuf.csm = radeon_cs_manager_legacy_ctor(rmesa);
+	}
+	if (rmesa->cmdbuf.csm == NULL) {
+		/* FIXME: fatal error */
+		return;
+	}
+	rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
+	assert(rmesa->cmdbuf.cs != NULL);
+	rmesa->cmdbuf.size = size;
+
+	radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
+				  (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
+
+	if (!rmesa->radeonScreen->kernel_mm) {
+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
+	} else {
+		struct drm_radeon_gem_info mminfo = { 0 };
+
+		if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
+		{
+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
+		}
+	}
+
+}
+/**
+ * Destroy the command buffer
+ */
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa)
+{
+	radeon_cs_destroy(rmesa->cmdbuf.cs);
+	if (rmesa->radeonScreen->driScreen->dri2.enabled || rmesa->radeonScreen->kernel_mm) {
+		radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm);
+	} else {
+		radeon_cs_manager_legacy_dtor(rmesa->cmdbuf.csm);
+	}
+}
+
+void rcommonBeginBatch(radeonContextPtr rmesa, int n,
+		       int dostate,
+		       const char *file,
+		       const char *function,
+		       int line)
+{
+	if (!rmesa->cmdbuf.cs->cdw && dostate) {
+		radeon_print(RADEON_STATE, RADEON_NORMAL,
+				"Reemit state after flush (from %s)\n", function);
+		radeonEmitState(rmesa);
+	}
+	radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line);
+
+    radeon_print(RADEON_CS, RADEON_VERBOSE, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
+                        n, rmesa->cmdbuf.cs->cdw, function, line);
+
+}
+
+void radeonUserClear(GLcontext *ctx, GLuint mask)
+{
+   _mesa_meta_clear(ctx, mask);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
new file mode 100644
index 00000000000..f3201911ac6
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
@@ -0,0 +1,87 @@
+#ifndef COMMON_MISC_H
+#define COMMON_MISC_H
+
+#include "radeon_common_context.h"
+#include "radeon_dma.h"
+#include "radeon_texture.h"
+
+void radeonUserClear(GLcontext *ctx, GLuint mask);
+void radeonRecalcScissorRects(radeonContextPtr radeon);
+void radeonSetCliprects(radeonContextPtr radeon);
+void radeonUpdateScissor( GLcontext *ctx );
+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
+void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask );
+
+void radeonWaitForIdleLocked(radeonContextPtr radeon);
+extern uint32_t radeonGetAge(radeonContextPtr radeon);
+void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
+		       const drm_clip_rect_t	  *rect);
+void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+			 int x, int y, int w, int h );
+
+void radeonUpdatePageFlipping(radeonContextPtr rmesa);
+
+void radeonFlush(GLcontext *ctx);
+void radeonFinish(GLcontext * ctx);
+void radeonEmitState(radeonContextPtr radeon);
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon);
+
+void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);
+
+void radeon_window_moved(radeonContextPtr radeon);
+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb);
+void radeonDrawBuffer( GLcontext *ctx, GLenum mode );
+void radeonReadBuffer( GLcontext *ctx, GLenum mode );
+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height);
+void radeon_get_cliprects(radeonContextPtr radeon,
+			  struct drm_clip_rect **cliprects,
+			  unsigned int *num_cliprects,
+			  int *x_off, int *y_off);
+void radeon_fbo_init(struct radeon_context *radeon);
+void
+radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+			   struct radeon_bo *bo);
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv);
+static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
+{
+	struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb;
+	if (rrb && rrb->base.ClassID == RADEON_RB_CLASS)
+		return rrb;
+	else
+		return NULL;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index)
+{
+	if (att_index >= 0)
+		return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer);
+	else
+		return NULL;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa)
+{
+	struct radeon_renderbuffer *rrb;
+	rrb = radeon_renderbuffer(rmesa->state.depth.rb);
+	if (!rrb)
+		return NULL;
+
+	return rrb;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa)
+{
+	struct radeon_renderbuffer *rrb;
+
+	rrb = radeon_renderbuffer(rmesa->state.color.rb);
+	if (!rrb)
+		return NULL;
+	return rrb;
+}
+
+#include "radeon_cmdbuf.h"
+
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
new file mode 100644
index 00000000000..71ee06d9a79
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -0,0 +1,805 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include "radeon_common.h"
+#include "xmlpool.h"		/* for symbolic values of enum-type options */
+#include "utils.h"
+#include "vblank.h"
+#include "drirenderbuffer.h"
+#include "drivers/common/meta.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/state.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */
+#include "r600_context.h"
+#endif
+
+#define DRIVER_DATE "20090101"
+
+#ifndef RADEON_DEBUG
+int RADEON_DEBUG = (0);
+#endif
+
+
+static const char* get_chip_family_name(int chip_family)
+{
+	switch(chip_family) {
+	case CHIP_FAMILY_R100: return "R100";
+	case CHIP_FAMILY_RV100: return "RV100";
+	case CHIP_FAMILY_RS100: return "RS100";
+	case CHIP_FAMILY_RV200: return "RV200";
+	case CHIP_FAMILY_RS200: return "RS200";
+	case CHIP_FAMILY_R200: return "R200";
+	case CHIP_FAMILY_RV250: return "RV250";
+	case CHIP_FAMILY_RS300: return "RS300";
+	case CHIP_FAMILY_RV280: return "RV280";
+	case CHIP_FAMILY_R300: return "R300";
+	case CHIP_FAMILY_R350: return "R350";
+	case CHIP_FAMILY_RV350: return "RV350";
+	case CHIP_FAMILY_RV380: return "RV380";
+	case CHIP_FAMILY_R420: return "R420";
+	case CHIP_FAMILY_RV410: return "RV410";
+	case CHIP_FAMILY_RS400: return "RS400";
+	case CHIP_FAMILY_RS600: return "RS600";
+	case CHIP_FAMILY_RS690: return "RS690";
+	case CHIP_FAMILY_RS740: return "RS740";
+	case CHIP_FAMILY_RV515: return "RV515";
+	case CHIP_FAMILY_R520: return "R520";
+	case CHIP_FAMILY_RV530: return "RV530";
+	case CHIP_FAMILY_R580: return "R580";
+	case CHIP_FAMILY_RV560: return "RV560";
+	case CHIP_FAMILY_RV570: return "RV570";
+	case CHIP_FAMILY_R600: return "R600";
+	case CHIP_FAMILY_RV610: return "RV610";
+	case CHIP_FAMILY_RV630: return "RV630";
+	case CHIP_FAMILY_RV670: return "RV670";
+	case CHIP_FAMILY_RV620: return "RV620";
+	case CHIP_FAMILY_RV635: return "RV635";
+	case CHIP_FAMILY_RS780: return "RS780";
+	case CHIP_FAMILY_RS880: return "RS880";
+	case CHIP_FAMILY_RV770: return "RV770";
+	case CHIP_FAMILY_RV730: return "RV730";
+	case CHIP_FAMILY_RV710: return "RV710";
+	case CHIP_FAMILY_RV740: return "RV740";
+	default: return "unknown";
+	}
+}
+
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	static char buffer[128];
+
+	switch (name) {
+	case GL_VENDOR:
+		if (IS_R600_CLASS(radeon->radeonScreen))
+			return (GLubyte *) "Advanced Micro Devices, Inc.";
+		else if (IS_R300_CLASS(radeon->radeonScreen))
+			return (GLubyte *) "DRI R300 Project";
+		else
+			return (GLubyte *) "Tungsten Graphics, Inc.";
+
+	case GL_RENDERER:
+	{
+		unsigned offset;
+		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+			radeon->radeonScreen->AGPMode;
+		const char* chipclass;
+		char hardwarename[32];
+
+		if (IS_R600_CLASS(radeon->radeonScreen))
+			chipclass = "R600";
+		else if (IS_R300_CLASS(radeon->radeonScreen))
+			chipclass = "R300";
+		else if (IS_R200_CLASS(radeon->radeonScreen))
+			chipclass = "R200";
+		else
+			chipclass = "R100";
+
+		sprintf(hardwarename, "%s (%s %04X)",
+		        chipclass,
+		        get_chip_family_name(radeon->radeonScreen->chip_family),
+		        radeon->radeonScreen->device_id);
+
+		offset = driGetRendererString(buffer, hardwarename, DRIVER_DATE,
+					      agp_mode);
+
+		if (IS_R600_CLASS(radeon->radeonScreen)) {
+			sprintf(&buffer[offset], " TCL");
+		} else if (IS_R300_CLASS(radeon->radeonScreen)) {
+			sprintf(&buffer[offset], " %sTCL",
+				(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
+				? "" : "NO-");
+		} else {
+			sprintf(&buffer[offset], " %sTCL",
+				!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+				? "" : "NO-");
+		}
+
+		if (radeon->radeonScreen->driScreen->dri2.enabled)
+			strcat(buffer, " DRI2");
+
+		return (GLubyte *) buffer;
+	}
+
+	default:
+		return NULL;
+	}
+}
+
+/* Initialize the driver's misc functions.
+ */
+static void radeonInitDriverFuncs(struct dd_function_table *functions)
+{
+	functions->GetString = radeonGetString;
+}
+
+/**
+ * Create and initialize all common fields of the context,
+ * including the Mesa context itself.
+ */
+GLboolean radeonInitContext(radeonContextPtr radeon,
+			    struct dd_function_table* functions,
+			    const __GLcontextModes * glVisual,
+			    __DRIcontextPrivate * driContextPriv,
+			    void *sharedContextPrivate)
+{
+	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+	GLcontext* ctx;
+	GLcontext* shareCtx;
+	int fthrottle_mode;
+
+	/* Fill in additional standard functions. */
+	radeonInitDriverFuncs(functions);
+
+	radeon->radeonScreen = screen;
+	/* Allocate and initialize the Mesa context */
+	if (sharedContextPrivate)
+		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
+	else
+		shareCtx = NULL;
+	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
+					    functions, (void *)radeon);
+	if (!radeon->glCtx)
+		return GL_FALSE;
+
+	ctx = radeon->glCtx;
+	driContextPriv->driverPrivate = radeon;
+
+	meta_init_metaops(ctx, &radeon->meta);
+
+	_mesa_meta_init(ctx);
+
+	/* DRI fields */
+	radeon->dri.context = driContextPriv;
+	radeon->dri.screen = sPriv;
+	radeon->dri.hwContext = driContextPriv->hHWContext;
+	radeon->dri.hwLock = &sPriv->pSAREA->lock;
+	radeon->dri.hwLockCount = 0;
+	radeon->dri.fd = sPriv->fd;
+	radeon->dri.drmMinor = sPriv->drm_version.minor;
+
+	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+					       screen->sarea_priv_offset);
+
+	/* Setup IRQs */
+	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+	radeon->iw.irq_seq = -1;
+	radeon->irqsEmitted = 0;
+	if (IS_R600_CLASS(radeon->radeonScreen))
+		radeon->do_irqs = 0;
+	else
+		radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+				   radeon->radeonScreen->irq);
+
+	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+	if (!radeon->do_irqs)
+		fprintf(stderr,
+			"IRQ's not enabled, falling back to %s: %d %d\n",
+			radeon->do_usleeps ? "usleeps" : "busy waits",
+			fthrottle_mode, radeon->radeonScreen->irq);
+
+        radeon->texture_depth = driQueryOptioni (&radeon->optionCache,
+					        "texture_depth");
+        if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+                radeon->texture_depth = ( glVisual->rgbBits > 16 ) ?
+	        DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+	if (IS_R600_CLASS(radeon->radeonScreen)) {
+		radeon->texture_row_align = 256;
+		radeon->texture_rect_row_align = 256;
+		radeon->texture_compressed_row_align = 256;
+	} else if (IS_R200_CLASS(radeon->radeonScreen) ||
+		   IS_R100_CLASS(radeon->radeonScreen)) {
+		radeon->texture_row_align = 32;
+		radeon->texture_rect_row_align = 64;
+		radeon->texture_compressed_row_align = 32;
+	} else { /* R300 - not sure this is all correct */
+		int chip_family = radeon->radeonScreen->chip_family;
+		if (chip_family == CHIP_FAMILY_RS600 ||
+		    chip_family == CHIP_FAMILY_RS690 ||
+		    chip_family == CHIP_FAMILY_RS740)
+			radeon->texture_row_align = 64;
+		else
+			radeon->texture_row_align = 32;
+		radeon->texture_rect_row_align = 64;
+		radeon->texture_compressed_row_align = 64;
+	}
+
+	make_empty_list(&radeon->query.not_flushed_head);
+	radeon_init_dma(radeon);
+
+	return GL_TRUE;
+}
+
+
+
+/**
+ * Destroy the command buffer and state atoms.
+ */
+static void radeon_destroy_atom_list(radeonContextPtr radeon)
+{
+	struct radeon_state_atom *atom;
+
+	foreach(atom, &radeon->hw.atomlist) {
+		FREE(atom->cmd);
+		if (atom->lastcmd)
+			FREE(atom->lastcmd);
+	}
+
+}
+
+/**
+ * Cleanup common context fields.
+ * Called by r200DestroyContext/r300DestroyContext
+ */
+void radeonDestroyContext(__DRIcontextPrivate *driContextPriv )
+{
+#ifdef RADEON_BO_TRACK
+	FILE *track;
+#endif
+	GET_CURRENT_CONTEXT(ctx);
+	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+	radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
+
+	assert(radeon);
+
+	_mesa_meta_free(radeon->glCtx);
+
+	if (radeon == current) {
+		radeon_firevertices(radeon);
+		_mesa_make_current(NULL, NULL, NULL);
+	}
+
+	if (!is_empty_list(&radeon->dma.reserved)) {
+		rcommonFlushCmdBuf( radeon, __FUNCTION__ );
+	}
+
+	radeonFreeDmaRegions(radeon);
+	radeonReleaseArrays(radeon->glCtx, ~0);
+	meta_destroy_metaops(&radeon->meta);
+	if (radeon->vtbl.free_context)
+		radeon->vtbl.free_context(radeon->glCtx);
+	_swsetup_DestroyContext( radeon->glCtx );
+	_tnl_DestroyContext( radeon->glCtx );
+	_vbo_DestroyContext( radeon->glCtx );
+	_swrast_DestroyContext( radeon->glCtx );
+
+	/* free atom list */
+	/* free the Mesa context */
+	_mesa_destroy_context(radeon->glCtx);
+
+	/* _mesa_destroy_context() might result in calls to functions that
+	 * depend on the DriverCtx, so don't set it to NULL before.
+	 *
+	 * radeon->glCtx->DriverCtx = NULL;
+	 */
+	/* free the option cache */
+	driDestroyOptionCache(&radeon->optionCache);
+
+	rcommonDestroyCmdBuf(radeon);
+
+	radeon_destroy_atom_list(radeon);
+
+	if (radeon->state.scissor.pClipRects) {
+		FREE(radeon->state.scissor.pClipRects);
+		radeon->state.scissor.pClipRects = 0;
+	}
+#ifdef RADEON_BO_TRACK
+	track = fopen("/tmp/tracklog", "w");
+	if (track) {
+		radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
+		fclose(track);
+	}
+#endif
+	FREE(radeon);
+}
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
+{
+	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+
+	if (RADEON_DEBUG & RADEON_DRI)
+		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+			radeon->glCtx);
+
+	return GL_TRUE;
+}
+
+
+static void
+radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon,
+					struct radeon_framebuffer *draw)
+{
+	/* if radeon->fake */
+	struct radeon_renderbuffer *rb;
+
+	if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->frontOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->backOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+}
+
+static void
+radeon_make_renderbuffer_current(radeonContextPtr radeon,
+				 struct radeon_framebuffer *draw)
+{
+	int size = 4096*4096*4;
+	/* if radeon->fake */
+	struct radeon_renderbuffer *rb;
+
+	if (radeon->radeonScreen->kernel_mm) {
+		radeon_make_kernel_renderbuffer_current(radeon, draw);
+		return;
+	}
+
+
+	if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->frontOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->backOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+}
+
+static unsigned
+radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
+{
+   switch (rb->base._ActualFormat) {
+   case GL_RGB5:
+   case GL_DEPTH_COMPONENT16:
+      return 16;
+   case GL_RGB8:
+   case GL_RGBA8:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH24_STENCIL8_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+      return 32;
+   default:
+      return 0;
+   }
+}
+
+void
+radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+	unsigned int attachments[10];
+	__DRIbuffer *buffers = NULL;
+	__DRIscreen *screen;
+	struct radeon_renderbuffer *rb;
+	int i, count;
+	struct radeon_framebuffer *draw;
+	radeonContextPtr radeon;
+	char *regname;
+	struct radeon_bo *depth_bo = NULL, *bo;
+
+	if (RADEON_DEBUG & RADEON_DRI)
+	    fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+
+	draw = drawable->driverPrivate;
+	screen = context->driScreenPriv;
+	radeon = (radeonContextPtr) context->driverPrivate;
+
+	if (screen->dri2.loader
+	   && (screen->dri2.loader->base.version > 2)
+	   && (screen->dri2.loader->getBuffersWithFormat != NULL)) {
+		struct radeon_renderbuffer *depth_rb;
+		struct radeon_renderbuffer *stencil_rb;
+
+		i = 0;
+		if ((radeon->is_front_buffer_rendering ||
+		     radeon->is_front_buffer_reading ||
+		     !draw->color_rb[1])
+		    && draw->color_rb[0]) {
+			attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+			attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]);
+		}
+
+		if (draw->color_rb[1]) {
+			attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+			attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]);
+		}
+
+		depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+		stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+
+		if ((depth_rb != NULL) && (stencil_rb != NULL)) {
+			attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+			attachments[i++] = radeon_bits_per_pixel(depth_rb);
+		} else if (depth_rb != NULL) {
+			attachments[i++] = __DRI_BUFFER_DEPTH;
+			attachments[i++] = radeon_bits_per_pixel(depth_rb);
+		} else if (stencil_rb != NULL) {
+			attachments[i++] = __DRI_BUFFER_STENCIL;
+			attachments[i++] = radeon_bits_per_pixel(stencil_rb);
+		}
+
+		buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable,
+								&drawable->w,
+								&drawable->h,
+								attachments, i / 2,
+								&count,
+								drawable->loaderPrivate);
+	} else if (screen->dri2.loader) {
+		i = 0;
+		if (draw->color_rb[0])
+			attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+		if (draw->color_rb[1])
+			attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+		if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH))
+			attachments[i++] = __DRI_BUFFER_DEPTH;
+		if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL))
+			attachments[i++] = __DRI_BUFFER_STENCIL;
+
+		buffers = (*screen->dri2.loader->getBuffers)(drawable,
+								 &drawable->w,
+								 &drawable->h,
+								 attachments, i,
+								 &count,
+								 drawable->loaderPrivate);
+	}
+
+	if (buffers == NULL)
+		return;
+
+	/* set one cliprect to cover the whole drawable */
+	drawable->x = 0;
+	drawable->y = 0;
+	drawable->backX = 0;
+	drawable->backY = 0;
+	drawable->numClipRects = 1;
+	drawable->pClipRects[0].x1 = 0;
+	drawable->pClipRects[0].y1 = 0;
+	drawable->pClipRects[0].x2 = drawable->w;
+	drawable->pClipRects[0].y2 = drawable->h;
+	drawable->numBackClipRects = 1;
+	drawable->pBackClipRects[0].x1 = 0;
+	drawable->pBackClipRects[0].y1 = 0;
+	drawable->pBackClipRects[0].x2 = drawable->w;
+	drawable->pBackClipRects[0].y2 = drawable->h;
+	for (i = 0; i < count; i++) {
+		switch (buffers[i].attachment) {
+		case __DRI_BUFFER_FRONT_LEFT:
+			rb = draw->color_rb[0];
+			regname = "dri2 front buffer";
+			break;
+		case __DRI_BUFFER_FAKE_FRONT_LEFT:
+			rb = draw->color_rb[0];
+			regname = "dri2 fake front buffer";
+			break;
+		case __DRI_BUFFER_BACK_LEFT:
+			rb = draw->color_rb[1];
+			regname = "dri2 back buffer";
+			break;
+		case __DRI_BUFFER_DEPTH:
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+			regname = "dri2 depth buffer";
+			break;
+		case __DRI_BUFFER_DEPTH_STENCIL:
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+			regname = "dri2 depth / stencil buffer";
+			break;
+		case __DRI_BUFFER_STENCIL:
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+			regname = "dri2 stencil buffer";
+			break;
+		case __DRI_BUFFER_ACCUM:
+		default:
+			fprintf(stderr,
+				"unhandled buffer attach event, attacment type %d\n",
+				buffers[i].attachment);
+			return;
+		}
+
+		if (rb == NULL)
+			continue;
+
+		if (rb->bo) {
+			uint32_t name = radeon_gem_name_bo(rb->bo);
+			if (name == buffers[i].name)
+				continue;
+		}
+
+		if (RADEON_DEBUG & RADEON_DRI)
+			fprintf(stderr,
+				"attaching buffer %s, %d, at %d, cpp %d, pitch %d\n",
+				regname, buffers[i].name, buffers[i].attachment,
+				buffers[i].cpp, buffers[i].pitch);
+
+		rb->cpp = buffers[i].cpp;
+		rb->pitch = buffers[i].pitch;
+		rb->base.Width = drawable->w;
+		rb->base.Height = drawable->h;
+		rb->has_surface = 0;
+
+		if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
+			if (RADEON_DEBUG & RADEON_DRI)
+				fprintf(stderr, "(reusing depth buffer as stencil)\n");
+			bo = depth_bo;
+			radeon_bo_ref(bo);
+		} else {
+			uint32_t tiling_flags = 0, pitch = 0;
+			int ret;
+
+			bo = radeon_bo_open(radeon->radeonScreen->bom,
+						buffers[i].name,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						buffers[i].flags);
+
+			if (bo == NULL) {
+
+				fprintf(stderr, "failed to attach %s %d\n",
+					regname, buffers[i].name);
+
+			}
+
+			ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch);
+			if (tiling_flags & RADEON_TILING_MACRO)
+				bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
+			if (tiling_flags & RADEON_TILING_MICRO)
+				bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
+			
+		}
+
+		if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
+			if (draw->base.Visual.depthBits == 16)
+				rb->cpp = 2;
+			depth_bo = bo;
+		}
+
+		radeon_renderbuffer_set_bo(rb, bo);
+		radeon_bo_unref(bo);
+
+		if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+			if (rb != NULL) {
+				struct radeon_bo *stencil_bo = NULL;
+
+				if (rb->bo) {
+					uint32_t name = radeon_gem_name_bo(rb->bo);
+					if (name == buffers[i].name)
+						continue;
+				}
+
+				stencil_bo = bo;
+				radeon_bo_ref(stencil_bo);
+				radeon_renderbuffer_set_bo(rb, stencil_bo);
+				radeon_bo_unref(stencil_bo);
+			}
+		}
+	}
+
+	driUpdateFramebufferSize(radeon->glCtx, drawable);
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+			    __DRIdrawablePrivate * driDrawPriv,
+			    __DRIdrawablePrivate * driReadPriv)
+{
+	radeonContextPtr radeon;
+	struct radeon_framebuffer *drfb;
+	struct gl_framebuffer *readfb;
+
+	if (!driContextPriv) {
+		if (RADEON_DEBUG & RADEON_DRI)
+			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+		_mesa_make_current(NULL, NULL, NULL);
+		return GL_TRUE;
+	}
+
+	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+	drfb = driDrawPriv->driverPrivate;
+	readfb = driReadPriv->driverPrivate;
+
+	if (driContextPriv->driScreenPriv->dri2.enabled) {
+		radeon_update_renderbuffers(driContextPriv, driDrawPriv);
+		if (driDrawPriv != driReadPriv)
+			radeon_update_renderbuffers(driContextPriv, driReadPriv);
+		_mesa_reference_renderbuffer(&radeon->state.color.rb,
+			&(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base));
+		_mesa_reference_renderbuffer(&radeon->state.depth.rb,
+			&(radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH)->base));
+	} else {
+		radeon_make_renderbuffer_current(radeon, drfb);
+	}
+
+	if (RADEON_DEBUG & RADEON_DRI)
+	     fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb);
+
+	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
+	if (driReadPriv != driDrawPriv)
+		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
+
+	_mesa_make_current(radeon->glCtx, &drfb->base, readfb);
+
+	_mesa_update_state(radeon->glCtx);
+
+	if (radeon->glCtx->DrawBuffer == &drfb->base) {
+		if (driDrawPriv->swap_interval == (unsigned)-1) {
+			int i;
+			driDrawPriv->vblFlags =
+				(radeon->radeonScreen->irq != 0)
+				? driGetDefaultVBlankFlags(&radeon->
+							   optionCache)
+				: VBLANK_FLAG_NO_IRQ;
+
+			driDrawableInitVBlank(driDrawPriv);
+			drfb->vbl_waited = driDrawPriv->vblSeq;
+
+			for (i = 0; i < 2; i++) {
+				if (drfb->color_rb[i])
+					drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
+			}
+
+		}
+
+		radeon_window_moved(radeon);
+		radeon_draw_buffer(radeon->glCtx, &drfb->base);
+	}
+
+
+	if (RADEON_DEBUG & RADEON_DRI)
+		fprintf(stderr, "End %s\n", __FUNCTION__);
+
+	return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
new file mode 100644
index 00000000000..0309345393d
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -0,0 +1,594 @@
+
+#ifndef COMMON_CONTEXT_H
+#define COMMON_CONTEXT_H
+
+#include "main/mm.h"
+#include "math/m_vector.h"
+#include "texmem.h"
+#include "tnl/t_context.h"
+#include "main/colormac.h"
+
+#include "radeon_debug.h"
+#include "radeon_screen.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "tnl/t_vertex.h"
+
+#include "dri_metaops.h"
+struct radeon_context;
+
+#include "radeon_bocs_wrapper.h"
+
+/* This union is used to avoid warnings/miscompilation
+   with float to uint32_t casts due to strict-aliasing */
+typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
+
+struct radeon_context;
+typedef struct radeon_context radeonContextRec;
+typedef struct radeon_context *radeonContextPtr;
+
+
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_2   0x4
+#define TEX_3	0x8
+#define TEX_4	0x10
+#define TEX_5	0x20
+
+/* Rasterizing fallbacks */
+/* See correponding strings in r200_swtcl.c */
+#define RADEON_FALLBACK_TEXTURE		0x0001
+#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
+#define RADEON_FALLBACK_STENCIL		0x0004
+#define RADEON_FALLBACK_RENDER_MODE	0x0008
+#define RADEON_FALLBACK_BLEND_EQ	0x0010
+#define RADEON_FALLBACK_BLEND_FUNC	0x0020
+#define RADEON_FALLBACK_DISABLE 	0x0040
+#define RADEON_FALLBACK_BORDER_MODE	0x0080
+#define RADEON_FALLBACK_DEPTH_BUFFER	0x0100
+#define RADEON_FALLBACK_STENCIL_BUFFER  0x0200
+
+#define R200_FALLBACK_TEXTURE           0x01
+#define R200_FALLBACK_DRAW_BUFFER       0x02
+#define R200_FALLBACK_STENCIL           0x04
+#define R200_FALLBACK_RENDER_MODE       0x08
+#define R200_FALLBACK_DISABLE           0x10
+#define R200_FALLBACK_BORDER_MODE       0x20
+
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
+
+/* The blit width for texture uploads
+ */
+#define BLIT_WIDTH_BYTES 1024
+
+/* Use the templated vertex format:
+ */
+#define COLOR_IS_RGBA
+#define TAG(x) radeon##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define RADEON_RB_CLASS 0xdeadbeef
+
+struct radeon_renderbuffer
+{
+	struct gl_renderbuffer base;
+	struct radeon_bo *bo;
+	unsigned int cpp;
+	/* unsigned int offset; */
+	unsigned int pitch;
+
+	uint32_t draw_offset; /* FBO */
+	/* boo Xorg 6.8.2 compat */
+	int has_surface;
+
+	GLuint pf_pending;  /**< sequence number of pending flip */
+	GLuint vbl_pending;   /**< vblank sequence number of pending flip */
+	__DRIdrawablePrivate *dPriv;
+};
+
+struct radeon_framebuffer
+{
+	struct gl_framebuffer base;
+
+	struct radeon_renderbuffer *color_rb[2];
+
+	GLuint vbl_waited;
+
+	/* buffer swap */
+	int64_t swap_ust;
+	int64_t swap_missed_ust;
+
+	GLuint swap_count;
+	GLuint swap_missed_count;
+
+	/* Drawable page flipping state */
+	GLboolean pf_active;
+	GLint pf_current_page;
+	GLint pf_num_pages;
+
+};
+
+
+struct radeon_colorbuffer_state {
+	GLuint clear;
+	int roundEnable;
+	struct gl_renderbuffer *rb;
+	uint32_t draw_offset; /* offset into color renderbuffer - FBOs */
+};
+
+struct radeon_depthbuffer_state {
+	GLuint clear;
+	struct gl_renderbuffer *rb;
+};
+
+struct radeon_scissor_state {
+	drm_clip_rect_t rect;
+	GLboolean enabled;
+
+	GLuint numClipRects;	/* Cliprects active */
+	GLuint numAllocedClipRects;	/* Cliprects available */
+	drm_clip_rect_t *pClipRects;
+};
+
+struct radeon_stencilbuffer_state {
+	GLuint clear;		/* rb3d_stencilrefmask value */
+};
+
+struct radeon_state_atom {
+	struct radeon_state_atom *next, *prev;
+	const char *name;	/* for debug */
+	int cmd_size;		/* size in bytes */
+        GLuint idx;
+	GLuint is_tcl;
+        GLuint *cmd;		/* one or more cmd's */
+	GLuint *lastcmd;		/* one or more cmd's */
+	GLboolean dirty;	/* dirty-mark in emit_state_list */
+        int (*check) (GLcontext *, struct radeon_state_atom *atom); /* is this state active? */
+        void (*emit) (GLcontext *, struct radeon_state_atom *atom);
+};
+
+struct radeon_hw_state {
+  	/* Head of the linked list of state atoms. */
+	struct radeon_state_atom atomlist;
+	int max_state_size;	/* Number of bytes necessary for a full state emit. */
+	int max_post_flush_size; /* Number of bytes necessary for post flushing emits */
+	GLboolean is_dirty, all_dirty;
+};
+
+
+/* Texture related */
+typedef struct _radeon_texture_image radeon_texture_image;
+
+struct _radeon_texture_image {
+	struct gl_texture_image base;
+
+	/**
+	 * If mt != 0, the image is stored in hardware format in the
+	 * given mipmap tree. In this case, base.Data may point into the
+	 * mapping of the buffer object that contains the mipmap tree.
+	 *
+	 * If mt == 0, the image is stored in normal memory pointed to
+	 * by base.Data.
+	 */
+	struct _radeon_mipmap_tree *mt;
+	struct radeon_bo *bo;
+
+	int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */
+	int mtface; /** if mt != 0, this is the image's face in the mipmap tree */
+};
+
+
+static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image)
+{
+	return (radeon_texture_image*)image;
+}
+
+
+typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+
+#define RADEON_TXO_MICRO_TILE               (1 << 3)
+
+/* Texture object in locally shared texture space.
+ */
+struct radeon_tex_obj {
+	struct gl_texture_object base;
+	struct _radeon_mipmap_tree *mt;
+
+	/**
+	 * This is true if we've verified that the mipmap tree above is complete
+	 * and so on.
+	 */
+	GLboolean validated;
+
+	GLuint override_offset;
+	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+	GLuint tile_bits;	/* hw texture tile bits used on this texture */
+        struct radeon_bo *bo;
+
+	GLuint pp_txfilter;	/* hardware register values */
+	GLuint pp_txformat;
+	GLuint pp_txformat_x;
+	GLuint pp_txsize;	/* npot only */
+	GLuint pp_txpitch;	/* npot only */
+	GLuint pp_border_color;
+	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
+
+        GLuint pp_txfilter_1;	/*  r300 */
+
+	/* r700 texture states */
+	GLuint SQ_TEX_RESOURCE0;
+	GLuint SQ_TEX_RESOURCE1;
+	GLuint SQ_TEX_RESOURCE2;
+	GLuint SQ_TEX_RESOURCE3;
+	GLuint SQ_TEX_RESOURCE4;
+	GLuint SQ_TEX_RESOURCE5;
+	GLuint SQ_TEX_RESOURCE6;
+
+	GLuint SQ_TEX_SAMPLER0;
+	GLuint SQ_TEX_SAMPLER1;
+	GLuint SQ_TEX_SAMPLER2;
+
+	GLuint TD_PS_SAMPLER0_BORDER_RED;
+	GLuint TD_PS_SAMPLER0_BORDER_GREEN;
+	GLuint TD_PS_SAMPLER0_BORDER_BLUE;
+	GLuint TD_PS_SAMPLER0_BORDER_ALPHA;
+
+	GLboolean border_fallback;
+
+
+};
+
+static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj)
+{
+	return (radeonTexObj*)texObj;
+}
+
+/* occlusion query */
+struct radeon_query_object {
+	struct gl_query_object Base;
+	struct radeon_bo *bo;
+	int curr_offset;
+	GLboolean emitted_begin;
+
+	/* Double linked list of not flushed query objects */
+	struct radeon_query_object *prev, *next;
+};
+
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+	int refcount;		/* the number of retained regions in buf */
+	drmBufPtr buf;
+};
+
+struct radeon_aos {
+	struct radeon_bo *bo; /** Buffer object where vertex data is stored */
+	int offset; /** Offset into buffer object, in bytes */
+	int components; /** Number of components per vertex */
+	int stride; /** Stride in dwords (may be 0 for repeating) */
+	int count; /** Number of vertices */
+};
+
+#define DMA_BO_FREE_TIME 100
+
+struct radeon_dma_bo {
+  struct radeon_dma_bo *next, *prev;
+  struct radeon_bo *bo;
+  int expire_counter;
+};
+
+struct radeon_dma {
+        /* Active dma region.  Allocations for vertices and retained
+         * regions come from here.  Also used for emitting random vertices,
+         * these may be flushed by calling flush_current();
+         */
+	struct radeon_dma_bo free;
+	struct radeon_dma_bo wait;
+	struct radeon_dma_bo reserved;
+        size_t current_used; /** Number of bytes allocated and forgotten about */
+        size_t current_vertexptr; /** End of active vertex region */
+        size_t minimum_size;
+
+        /**
+         * If current_vertexptr != current_used then flush must be non-zero.
+         * flush must be called before non-active vertex allocations can be
+         * performed.
+         */
+        void (*flush) (GLcontext *);
+};
+
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+
+	GLuint RenderIndex;
+	GLuint vertex_size;
+	GLubyte *verts;
+
+	/* Fallback rasterization functions
+	 */
+	GLuint hw_primitive;
+	GLenum render_primitive;
+	GLuint numverts;
+
+	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+	GLuint vertex_attr_count;
+
+	GLuint emit_prediction;
+};
+
+#define RADEON_MAX_AOS_ARRAYS		16
+struct radeon_tcl_info {
+	struct radeon_aos aos[RADEON_MAX_AOS_ARRAYS];
+	GLuint aos_count;
+	struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
+	int elt_dma_offset; /** Offset into this buffer object, in bytes */
+};
+
+struct radeon_ioctl {
+	GLuint vertex_offset;
+	GLuint vertex_max;
+	struct radeon_bo *bo;
+	GLuint vertex_size;
+};
+
+#define RADEON_MAX_PRIMS 64
+
+struct radeon_prim {
+	GLuint start;
+	GLuint end;
+	GLuint prim;
+};
+
+static INLINE GLuint radeonPackColor(GLuint cpp,
+                                     GLubyte r, GLubyte g,
+                                     GLubyte b, GLubyte a)
+{
+	switch (cpp) {
+	case 2:
+		return PACK_COLOR_565(r, g, b);
+	case 4:
+		return PACK_COLOR_8888(a, r, g, b);
+	default:
+		return 0;
+	}
+}
+
+#define MAX_CMD_BUF_SZ (16*1024)
+
+#define MAX_DMA_BUF_SZ (64*1024)
+
+struct radeon_store {
+	GLuint statenr;
+	GLuint primnr;
+	char cmd_buf[MAX_CMD_BUF_SZ];
+	int cmd_used;
+	int elts_start;
+};
+
+struct radeon_dri_mirror {
+	__DRIcontextPrivate *context;	/* DRI context */
+	__DRIscreenPrivate *screen;	/* DRI screen */
+
+	drm_context_t hwContext;
+	drm_hw_lock_t *hwLock;
+	int hwLockCount;
+	int fd;
+	int drmMinor;
+};
+
+typedef void (*radeon_tri_func) (radeonContextPtr,
+				 radeonVertex *,
+				 radeonVertex *, radeonVertex *);
+
+typedef void (*radeon_line_func) (radeonContextPtr,
+				  radeonVertex *, radeonVertex *);
+
+typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
+
+#define RADEON_MAX_BOS 32
+struct radeon_state {
+	struct radeon_colorbuffer_state color;
+	struct radeon_depthbuffer_state depth;
+	struct radeon_scissor_state scissor;
+	struct radeon_stencilbuffer_state stencil;
+
+	struct radeon_cs_space_check bos[RADEON_MAX_BOS];
+	int validated_bo_count;
+};
+
+/**
+ * This structure holds the command buffer while it is being constructed.
+ *
+ * The first batch of commands in the buffer is always the state that needs
+ * to be re-emitted when the context is lost. This batch can be skipped
+ * otherwise.
+ */
+struct radeon_cmdbuf {
+	struct radeon_cs_manager    *csm;
+	struct radeon_cs            *cs;
+	int size; /** # of dwords total */
+	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
+};
+
+struct radeon_context {
+   GLcontext *glCtx;
+   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+
+   /* Texture object bookkeeping
+    */
+   int                   texture_depth;
+   float                 initialMaxAnisotropy;
+   uint32_t              texture_row_align;
+   uint32_t              texture_rect_row_align;
+   uint32_t              texture_compressed_row_align;
+
+  struct radeon_dma dma;
+  struct radeon_hw_state hw;
+   /* Rasterization and vertex state:
+    */
+   GLuint TclFallback;
+   GLuint Fallback;
+   GLuint NewGLState;
+   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
+
+   /* Drawable, cliprect and scissor information */
+   GLuint numClipRects;	/* Cliprects for the draw buffer */
+   drm_clip_rect_t *pClipRects;
+   unsigned int lastStamp;
+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
+
+   /* Mirrors of some DRI state */
+   struct radeon_dri_mirror dri;
+
+   /* Busy waiting */
+   GLuint do_usleeps;
+   GLuint do_irqs;
+   GLuint irqsEmitted;
+   drm_radeon_irq_wait_t iw;
+
+   /* Derived state - for r300 only */
+   struct radeon_state state;
+
+   struct radeon_swtcl_info swtcl;
+   struct radeon_tcl_info tcl;
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+
+   struct radeon_cmdbuf cmdbuf;
+
+   struct radeon_debug debug;
+
+  drm_clip_rect_t fboRect;
+  GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */
+  GLboolean front_cliprects;
+
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   GLboolean front_buffer_dirty;
+
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   GLboolean is_front_buffer_rendering;
+
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   GLboolean is_front_buffer_reading;
+
+   struct dri_metaops meta;
+
+   struct {
+	struct radeon_query_object *current;
+	struct radeon_query_object not_flushed_head;
+	struct radeon_state_atom queryobj;
+   } query;
+
+   struct {
+	   void (*get_lock)(radeonContextPtr radeon);
+	   void (*update_viewport_offset)(GLcontext *ctx);
+	   void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa);
+	   void (*swtcl_flush)(GLcontext *ctx, uint32_t offset);
+	   void (*pre_emit_atoms)(radeonContextPtr rmesa);
+	   void (*pre_emit_state)(radeonContextPtr rmesa);
+	   void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode);
+	   void (*free_context)(GLcontext *ctx);
+	   void (*emit_query_finish)(radeonContextPtr radeon);
+	   void (*update_scissor)(GLcontext *ctx);
+   } vtbl;
+};
+
+#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+
+static inline __DRIdrawablePrivate* radeon_get_drawable(radeonContextPtr radeon)
+{
+	return radeon->dri.context->driDrawablePriv;
+}
+
+static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon)
+{
+	return radeon->dri.context->driReadablePriv;
+}
+
+/**
+ * This function takes a float and packs it into a uint32_t
+ */
+static INLINE uint32_t radeonPackFloat32(float fl)
+{
+	union {
+		float fl;
+		uint32_t u;
+	} u;
+
+	u.fl = fl;
+	return u.u;
+}
+
+/* This is probably wrong for some values, I need to test this
+ * some more.  Range checking would be a good idea also..
+ *
+ * But it works for most things.  I'll fix it later if someone
+ * else with a better clue doesn't
+ */
+static INLINE uint32_t radeonPackFloat24(float f)
+{
+	float mantissa;
+	int exponent;
+	uint32_t float24 = 0;
+
+	if (f == 0.0)
+		return 0;
+
+	mantissa = frexpf(f, &exponent);
+
+	/* Handle -ve */
+	if (mantissa < 0) {
+		float24 |= (1 << 23);
+		mantissa = mantissa * -1.0;
+	}
+	/* Handle exponent, bias of 63 */
+	exponent += 62;
+	float24 |= (exponent << 16);
+	/* Kill 7 LSB of mantissa */
+	float24 |= (radeonPackFloat32(mantissa) & 0x7FFFFF) >> 7;
+
+	return float24;
+}
+
+GLboolean radeonInitContext(radeonContextPtr radeon,
+			    struct dd_function_table* functions,
+			    const __GLcontextModes * glVisual,
+			    __DRIcontextPrivate * driContextPriv,
+			    void *sharedContextPrivate);
+
+void radeonCleanupContext(radeonContextPtr radeon);
+GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable);
+GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+			    __DRIdrawablePrivate * driDrawPriv,
+			    __DRIdrawablePrivate * driReadPriv);
+extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c
deleted file mode 100644
index 46b490d61f7..00000000000
--- a/src/mesa/drivers/dri/radeon/radeon_compat.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/**************************************************************************
-
-Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
-               Tungsten Graphics Inc., Austin, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
- *
- */
-
-#include "main/glheader.h"
-#include "main/imports.h"
-
-#include "radeon_context.h"
-#include "radeon_state.h"
-#include "radeon_ioctl.h"
-
-
-static struct { 
-	int start; 
-	int len; 
-	const char *name;
-} packet[RADEON_MAX_STATE_PACKETS] = {
-	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
-	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
-	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
-	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
-	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
-	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
-	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
-	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
-	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
-	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
-	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
-	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
-	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
-	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
-	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
-	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
-	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
-	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
-	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
-	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
-	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
-};
-
-
-static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
-				    struct radeon_state_atom *state )
-{
-   drm_radeon_sarea_t *sarea = rmesa->sarea;
-   drm_radeon_context_regs_t *ctx = &sarea->context_state;
-   drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0];
-   drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1];
-   int i;
-   int *buf = state->cmd;
-
-   for ( i = 0 ; i < state->cmd_size ; ) {
-      drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++];
-
-      if (RADEON_DEBUG & DEBUG_STATE)
-	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
-		 packet[(int)header->packet.packet_id].name);
-
-      switch (header->packet.packet_id) {
-      case RADEON_EMIT_PP_MISC:
-	 ctx->pp_misc = buf[i++]; 
-	 ctx->pp_fog_color = buf[i++];
-	 ctx->re_solid_color = buf[i++];
-	 ctx->rb3d_blendcntl = buf[i++];
-	 ctx->rb3d_depthoffset = buf[i++];
-	 ctx->rb3d_depthpitch = buf[i++];
-	 ctx->rb3d_zstencilcntl = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
-	 break;
-      case RADEON_EMIT_PP_CNTL:
-	 ctx->pp_cntl = buf[i++];
-	 ctx->rb3d_cntl = buf[i++];
-	 ctx->rb3d_coloroffset = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
-	 break;
-      case RADEON_EMIT_RB3D_COLORPITCH:
-	 ctx->rb3d_colorpitch = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
-	 break;
-      case RADEON_EMIT_RE_LINE_PATTERN:
-	 ctx->re_line_pattern = buf[i++];
-	 ctx->re_line_state = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_LINE;
-	 break;
-      case RADEON_EMIT_SE_LINE_WIDTH:
-	 ctx->se_line_width = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_LINE;
-	 break;
-      case RADEON_EMIT_PP_LUM_MATRIX:
-	 ctx->pp_lum_matrix = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
-	 break;
-      case RADEON_EMIT_PP_ROT_MATRIX_0:
-	 ctx->pp_rot_matrix_0 = buf[i++];
-	 ctx->pp_rot_matrix_1 = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
-	 break;
-      case RADEON_EMIT_RB3D_STENCILREFMASK:
-	 ctx->rb3d_stencilrefmask = buf[i++];
-	 ctx->rb3d_ropcntl = buf[i++];
-	 ctx->rb3d_planemask = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_MASKS;
-	 break;
-      case RADEON_EMIT_SE_VPORT_XSCALE:
-	 ctx->se_vport_xscale = buf[i++];
-	 ctx->se_vport_xoffset = buf[i++];
-	 ctx->se_vport_yscale = buf[i++];
-	 ctx->se_vport_yoffset = buf[i++];
-	 ctx->se_vport_zscale = buf[i++];
-	 ctx->se_vport_zoffset = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
-	 break;
-      case RADEON_EMIT_SE_CNTL:
-	 ctx->se_cntl = buf[i++];
-	 ctx->se_coord_fmt = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
-	 break;
-      case RADEON_EMIT_SE_CNTL_STATUS:
-	 ctx->se_cntl_status = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_SETUP;
-	 break;
-      case RADEON_EMIT_RE_MISC:
-	 ctx->re_misc = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_MISC;
-	 break;
-      case RADEON_EMIT_PP_TXFILTER_0:
-	 tex0->pp_txfilter = buf[i++];
-	 tex0->pp_txformat = buf[i++];
-	 tex0->pp_txoffset = buf[i++];
-	 tex0->pp_txcblend = buf[i++];
-	 tex0->pp_txablend = buf[i++];
-	 tex0->pp_tfactor = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_TEX0;
-	 break;
-      case RADEON_EMIT_PP_BORDER_COLOR_0:
-	 tex0->pp_border_color = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_TEX0;
-	 break;
-      case RADEON_EMIT_PP_TXFILTER_1:
-	 tex1->pp_txfilter = buf[i++];
-	 tex1->pp_txformat = buf[i++];
-	 tex1->pp_txoffset = buf[i++];
-	 tex1->pp_txcblend = buf[i++];
-	 tex1->pp_txablend = buf[i++];
-	 tex1->pp_tfactor = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_TEX1;
-	 break;
-      case RADEON_EMIT_PP_BORDER_COLOR_1:
-	 tex1->pp_border_color = buf[i++];
-	 sarea->dirty |= RADEON_UPLOAD_TEX1;
-	 break;
-
-      case RADEON_EMIT_SE_ZBIAS_FACTOR:
-	 i++;
-	 i++;
-	 break;
-
-      case RADEON_EMIT_PP_TXFILTER_2:
-      case RADEON_EMIT_PP_BORDER_COLOR_2:
-      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
-      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
-      default:
-	 /* These states aren't understood by radeon drm 1.1 */
-	 fprintf(stderr, "Tried to emit unsupported state\n");
-	 return;
-      }
-   }
-}
-
-
-
-static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
-{
-   struct radeon_state_atom *atom;
-
-   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
-      return;
-
-   foreach(atom, &rmesa->hw.atomlist) {
-      if (rmesa->hw.all_dirty)
-	 atom->dirty = GL_TRUE;
-      if (atom->is_tcl)
-	 atom->dirty = GL_FALSE;
-      if (atom->dirty)
-	 radeonCompatEmitPacket(rmesa, atom);
-   }
- 
-   rmesa->hw.is_dirty = GL_FALSE;
-   rmesa->hw.all_dirty = GL_FALSE;
-}
-
-
-static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
-					     GLuint hw_primitive,
-					     GLuint nverts,
-					     drm_clip_rect_t *pbox,
-					     GLuint nbox )
-{
-   int i;
-
-   for ( i = 0 ; i < nbox ; ) {
-      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
-      drm_clip_rect_t *b = rmesa->sarea->boxes;
-      drm_radeon_vertex_t vtx;
-      
-      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
-      rmesa->sarea->nbox = nr - i;
-
-      for ( ; i < nr ; i++) 
-	 *b++ = pbox[i];
-      
-      if (RADEON_DEBUG & DEBUG_IOCTL)
-	 fprintf(stderr, 
-		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
-		 "disc %d nbox %d\n",
-		 hw_primitive, 
-		 rmesa->dma.current.buf->buf->idx, 
-		 nverts, 
-		 nr == nbox,
-		 rmesa->sarea->nbox );
-
-      vtx.prim = hw_primitive;
-      vtx.idx = rmesa->dma.current.buf->buf->idx;
-      vtx.count = nverts;
-      vtx.discard = (nr == nbox);      
-
-      drmCommandWrite( rmesa->dri.fd, 
-		       DRM_RADEON_VERTEX,
-		       &vtx, sizeof(vtx));
-   }
-}
-
-
-
-/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
- */
-void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
-				GLuint vertex_format,
-				GLuint hw_primitive,
-				GLuint nrverts )
-{
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   LOCK_HARDWARE( rmesa );
-
-   radeonCompatEmitStateLocked( rmesa );
-   rmesa->sarea->vc_format = vertex_format;
-   
-   if (rmesa->state.scissor.enabled) {
-      radeonCompatEmitPrimitiveLocked( rmesa, 
-				       hw_primitive,
-				       nrverts,
-				       rmesa->state.scissor.pClipRects,
-				       rmesa->state.scissor.numClipRects );
-   }
-   else {
-      radeonCompatEmitPrimitiveLocked( rmesa, 
-				       hw_primitive,
-				       nrverts,
-				       rmesa->pClipRects,
-				       rmesa->numClipRects );
-   }
-
-
-   UNLOCK_HARDWARE( rmesa );
-}
-
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index b67bda7d06a..8f4485aee7c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "drivers/common/driverfuncs.h"
 
+#include "radeon_common.h"
 #include "radeon_context.h"
 #include "radeon_ioctl.h"
 #include "radeon_state.h"
@@ -61,10 +62,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_swtcl.h"
 #include "radeon_tcl.h"
 #include "radeon_maos.h"
+#include "radeon_queryobj.h"
 
+#define need_GL_ARB_occlusion_query
 #define need_GL_EXT_blend_minmax
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_secondary_color
+#define need_GL_EXT_framebuffer_object
 #include "extension_helper.h"
 
 #define DRIVER_DATE	"20061018"
@@ -72,46 +76,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "vblank.h"
 #include "utils.h"
 #include "xmlpool.h" /* for symbolic values of enum-type options */
-#ifndef RADEON_DEBUG
-int RADEON_DEBUG = (0);
-#endif
-
-
-/* Return various strings for glGetString().
- */
-static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   static char buffer[128];
-   unsigned   offset;
-   GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
-      rmesa->radeonScreen->AGPMode;
-
-   switch ( name ) {
-   case GL_VENDOR:
-      return (GLubyte *)"Tungsten Graphics, Inc.";
-
-   case GL_RENDERER:
-      offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
-				     agp_mode );
-
-      sprintf( & buffer[ offset ], " %sTCL",
-	       !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
-	       ? "" : "NO-" );
-
-      return (GLubyte *)buffer;
-
-   default:
-      return NULL;
-   }
-}
-
 
 /* Extension strings exported by the R100 driver.
  */
 const struct dri_extension card_extensions[] =
 {
     { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_occlusion_query",		   GL_ARB_occlusion_query_functions},
     { "GL_ARB_texture_border_clamp",       NULL },
     { "GL_ARB_texture_env_add",            NULL },
     { "GL_ARB_texture_env_combine",        NULL },
@@ -121,6 +92,7 @@ const struct dri_extension card_extensions[] =
     { "GL_EXT_blend_logic_op",             NULL },
     { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
     { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_packed_depth_stencil",	   NULL},
     { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
     { "GL_EXT_stencil_wrap",               NULL },
     { "GL_EXT_texture_edge_clamp",         NULL },
@@ -137,6 +109,11 @@ const struct dri_extension card_extensions[] =
     { NULL,                                NULL }
 };
 
+const struct dri_extension mm_extensions[] = {
+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+  { NULL, NULL }
+};
+
 extern const struct tnl_pipeline_stage _radeon_render_stage;
 extern const struct tnl_pipeline_stage _radeon_tcl_stage;
 
@@ -160,47 +137,85 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = {
    NULL,
 };
 
+static void r100_get_lock(radeonContextPtr radeon)
+{
+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
+   drm_radeon_sarea_t *sarea = radeon->sarea;
 
+   RADEON_STATECHANGE(rmesa, ctx);
+   if (rmesa->radeon.sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+	 RADEON_COLOR_TILE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
+	 ~RADEON_COLOR_TILE_ENABLE;
+   }
+   
+   if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+      
+      if (!radeon->radeonScreen->kernel_mm)
+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
+   }
+}
 
-/* Initialize the driver's misc functions.
- */
-static void radeonInitDriverFuncs( struct dd_function_table *functions )
+static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
 {
-    functions->GetString	= radeonGetString;
 }
 
-static const struct dri_debug_control debug_control[] =
+static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
 {
-    { "fall",  DEBUG_FALLBACKS },
-    { "tex",   DEBUG_TEXTURE },
-    { "ioctl", DEBUG_IOCTL },
-    { "prim",  DEBUG_PRIMS },
-    { "vert",  DEBUG_VERTS },
-    { "state", DEBUG_STATE },
-    { "code",  DEBUG_CODEGEN },
-    { "vfmt",  DEBUG_VFMT },
-    { "vtxf",  DEBUG_VFMT },
-    { "verb",  DEBUG_VERBOSE },
-    { "dri",   DEBUG_DRI },
-    { "dma",   DEBUG_DMA },
-    { "san",   DEBUG_SANITY },
-    { "sync",  DEBUG_SYNC },
-    { NULL,    0 }
-};
+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
+   
+   /* r100 always needs to emit ZBS to avoid TCL lockups */
+   rmesa->hw.zbs.dirty = 1;
+   radeon->hw.is_dirty = 1;
+}
+
+static void r100_vtbl_free_context(GLcontext *ctx)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+}
 
+static void r100_emit_query_finish(radeonContextPtr radeon)
+{
+   BATCH_LOCALS(radeon);
+   struct radeon_query_object *query = radeon->query.current;
+
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+   OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+   END_BATCH();
+   query->curr_offset += sizeof(uint32_t);
+   assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+   query->emitted_begin = GL_FALSE;
+}
+
+static void r100_init_vtbl(radeonContextPtr radeon)
+{
+   radeon->vtbl.get_lock = r100_get_lock;
+   radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
+   radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
+   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
+   radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
+   radeon->vtbl.fallback = radeonFallback;
+   radeon->vtbl.free_context = r100_vtbl_free_context;
+   radeon->vtbl.emit_query_finish = r100_emit_query_finish;
+}
 
 /* Create the device specific context.
  */
 GLboolean
-radeonCreateContext( const __GLcontextModes *glVisual,
+r100CreateContext( const __GLcontextModes *glVisual,
                      __DRIcontextPrivate *driContextPriv,
                      void *sharedContextPrivate)
 {
    __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
    struct dd_function_table functions;
-   radeonContextPtr rmesa;
-   GLcontext *ctx, *shareCtx;
+   r100ContextPtr rmesa;
+   GLcontext *ctx;
    int i;
    int tcl_mode, fthrottle_mode;
 
@@ -209,10 +224,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
    assert(screen);
 
    /* Allocate the Radeon context */
-   rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
+   rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
    if ( !rmesa )
       return GL_FALSE;
 
+   r100_init_vtbl(&rmesa->radeon);
+
    /* init exp fog table data */
    radeonInitStaticFogData();
    
@@ -220,12 +237,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
     * Do this here so that initialMaxAnisotropy is set before we create
     * the default textures.
     */
-   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
 			screen->driScreen->myNum, "radeon");
-   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
                                                  "def_max_anisotropy");
 
-   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
       if ( sPriv->drm_version.minor < 13 )
 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
 			  "disabling.\n", sPriv->drm_version.minor );
@@ -240,65 +257,18 @@ radeonCreateContext( const __GLcontextModes *glVisual,
     * (the texture functions are especially important)
     */
    _mesa_init_driver_functions( &functions );
-   radeonInitDriverFuncs( &functions );
    radeonInitTextureFuncs( &functions );
+   radeonInitQueryObjFunctions(&functions);
 
-   /* Allocate the Mesa context */
-   if (sharedContextPrivate)
-      shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
-   else
-      shareCtx = NULL;
-   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
-                                       &functions, (void *) rmesa);
-   if (!rmesa->glCtx) {
-      FREE(rmesa);
-      return GL_FALSE;
-   }
-   driContextPriv->driverPrivate = rmesa;
-
-   /* Init radeon context data */
-   rmesa->dri.context = driContextPriv;
-   rmesa->dri.screen = sPriv;
-   rmesa->dri.drawable = NULL;
-   rmesa->dri.readable = NULL;
-   rmesa->dri.hwContext = driContextPriv->hHWContext;
-   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
-   rmesa->dri.fd = sPriv->fd;
-   rmesa->dri.drmMinor = sPriv->drm_version.minor;
-
-   rmesa->radeonScreen = screen;
-   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
-				       screen->sarea_priv_offset);
-
-
-   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
-
-   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
-   make_empty_list( & rmesa->swapped );
-
-   rmesa->nr_heaps = screen->numTexHeaps;
-   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
-      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
-	    screen->texSize[i],
-	    12,
-	    RADEON_NR_TEX_REGIONS,
-	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
-	    & rmesa->sarea->tex_age[i],
-	    & rmesa->swapped,
-	    sizeof( radeonTexObj ),
-	    (destroy_texture_object_t *) radeonDestroyTexObj );
-
-      driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
-					& rmesa->c_textureSwaps );
+   if (!radeonInitContext(&rmesa->radeon, &functions,
+			  glVisual, driContextPriv,
+			  sharedContextPrivate)) {
+     FREE(rmesa);
+     return GL_FALSE;
    }
-   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
-					   "texture_depth");
-   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-      rmesa->texture_depth = ( screen->cpp == 4 ) ?
-	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
 
-   rmesa->swtcl.RenderIndex = ~0;
-   rmesa->hw.all_dirty = GL_TRUE;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
 
    /* Set the maximum texture size small enough that we can guarentee that
     * all texture units can bind a maximal texture and have all of them in
@@ -306,26 +276,20 @@ radeonCreateContext( const __GLcontextModes *glVisual,
     * setting allow larger textures.
     */
 
-   ctx = rmesa->glCtx;
-   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
+   ctx = rmesa->radeon.glCtx;
+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
 						 "texture_units");
    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
 
-   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
-
-   driCalculateMaxTextureLevels( rmesa->texture_heaps,
-				 rmesa->nr_heaps,
-				 & ctx->Const,
-				 4,
-				 11, /* max 2D texture size is 2048x2048 */
-				 8,  /* 256^3 */
-				 9,  /* \todo: max cube texture size seems to be 512x512(x6) */
-				 11, /* max rect texture size is 2048x2048. */
-				 12,
-				 GL_FALSE,
-				 i );
+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
 
+   /* FIXME: When no memory manager is available we should set this 
+    * to some reasonable value based on texture memory pool size */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = 2048;
 
    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 
@@ -390,38 +354,42 @@ radeonCreateContext( const __GLcontextModes *glVisual,
    }
 
    driInitExtensions( ctx, card_extensions, GL_TRUE );
-   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+     driInitExtensions(ctx, mm_extensions, GL_FALSE);
+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
-   if (rmesa->glCtx->Mesa_DXTn) {
+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
    }
-   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
    }
 
-   if (rmesa->dri.drmMinor >= 9)
+   if (rmesa->radeon.radeonScreen->kernel_mm || rmesa->radeon.dri.drmMinor >= 9)
       _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
 
+   if (!rmesa->radeon.radeonScreen->kernel_mm)
+      _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+
    /* XXX these should really go right after _mesa_init_driver_functions() */
-   radeonInitIoctlFuncs( ctx );
-   radeonInitStateFuncs( ctx );
+   radeon_fbo_init(&rmesa->radeon);
    radeonInitSpanFuncs( ctx );
+   radeonInitIoctlFuncs( ctx );
+   radeonInitStateFuncs( ctx , rmesa->radeon.radeonScreen->kernel_mm );
    radeonInitState( rmesa );
    radeonInitSwtcl( ctx );
 
    _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, 
 			 ctx->Const.MaxArrayLockSize, 32 );
 
-   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
-   rmesa->iw.irq_seq = -1;
-   rmesa->irqsEmitted = 0;
-   rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 &&
-		     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
-
-   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+   fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
+   rmesa->radeon.iw.irq_seq = -1;
+   rmesa->radeon.irqsEmitted = 0;
+   rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
+			    fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
 
-   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
+   rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
 
 
 #if DO_DEBUG
@@ -429,206 +397,21 @@ radeonCreateContext( const __GLcontextModes *glVisual,
 				       debug_control );
 #endif
 
-   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
-   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
       fprintf(stderr, "disabling 3D acceleration\n");
       FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
    } else if (tcl_mode == DRI_CONF_TCL_SW ||
-	      !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
-      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-	 rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+	      !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
 	 fprintf(stderr, "Disabling HW TCL support\n");
       }
-      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+      TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
    }
 
-   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
 /*       _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
    }
    return GL_TRUE;
 }
-
-
-/* Destroy the device specific context.
- */
-/* Destroy the Mesa and driver specific context data.
- */
-void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
-{
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
-   radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
-
-   /* check if we're deleting the currently bound context */
-   if (rmesa == current) {
-      RADEON_FIREVERTICES( rmesa );
-      _mesa_make_current(NULL, NULL, NULL);
-   }
-
-   /* Free radeon context resources */
-   assert(rmesa); /* should never be null */
-   if ( rmesa ) {
-      GLboolean   release_texture_heaps;
-
-
-      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
-      _swsetup_DestroyContext( rmesa->glCtx );
-      _tnl_DestroyContext( rmesa->glCtx );
-      _vbo_DestroyContext( rmesa->glCtx );
-      _swrast_DestroyContext( rmesa->glCtx );
-
-      radeonDestroySwtcl( rmesa->glCtx );
-      radeonReleaseArrays( rmesa->glCtx, ~0 );
-      if (rmesa->dma.current.buf) {
-	 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
-	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-      }
-
-      _mesa_vector4f_free( &rmesa->tcl.ObjClean );
-
-      if (rmesa->state.scissor.pClipRects) {
-	 FREE(rmesa->state.scissor.pClipRects);
-	 rmesa->state.scissor.pClipRects = NULL;
-      }
-
-      if ( release_texture_heaps ) {
-         /* This share group is about to go away, free our private
-          * texture object data.
-          */
-         int i;
-
-         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
-	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
-	    rmesa->texture_heaps[ i ] = NULL;
-         }
-
-	 assert( is_empty_list( & rmesa->swapped ) );
-      }
-
-      /* free the Mesa context */
-      rmesa->glCtx->DriverCtx = NULL;
-      _mesa_destroy_context( rmesa->glCtx );
-
-      /* free the option cache */
-      driDestroyOptionCache (&rmesa->optionCache);
-
-      FREE( rmesa );
-   }
-}
-
-
-
-
-void
-radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
-{
-
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      radeonContextPtr rmesa;
-      GLcontext *ctx;
-      rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-      ctx = rmesa->glCtx;
-      if (ctx->Visual.doubleBufferMode) {
-         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-
-         if ( rmesa->doPageFlip ) {
-            radeonPageFlip( dPriv );
-         }
-         else {
-	     radeonCopyBuffer( dPriv, NULL );
-         }
-      }
-   }
-   else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
-   }
-}
-
-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
-			 int x, int y, int w, int h )
-{
-    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-	radeonContextPtr radeon;
-	GLcontext *ctx;
-
-	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-	ctx = radeon->glCtx;
-
-	if (ctx->Visual.doubleBufferMode) {
-	    drm_clip_rect_t rect;
-	    rect.x1 = x + dPriv->x;
-	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
-	    rect.x2 = rect.x1 + w;
-	    rect.y2 = rect.y1 + h;
-	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
-	    radeonCopyBuffer(dPriv, &rect);
-	}
-    } else {
-	/* XXX this shouldn't be an error but we can't handle it for now */
-	_mesa_problem(NULL, "%s: drawable has no context!",
-		      __FUNCTION__);
-    }
-}
-
-/* Make context `c' the current context and bind it to the given
- * drawing and reading surfaces.
- */
-GLboolean
-radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
-                   __DRIdrawablePrivate *driDrawPriv,
-                   __DRIdrawablePrivate *driReadPriv )
-{
-   if ( driContextPriv ) {
-      radeonContextPtr newCtx = 
-	 (radeonContextPtr) driContextPriv->driverPrivate;
-
-      if (RADEON_DEBUG & DEBUG_DRI)
-	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
-
-      newCtx->dri.readable = driReadPriv;
-
-      if ( (newCtx->dri.drawable != driDrawPriv) ||
-           newCtx->lastStamp != driDrawPriv->lastStamp ) {
-	 if (driDrawPriv->swap_interval == (unsigned)-1) {
-	    driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
-	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
-	       : VBLANK_FLAG_NO_IRQ;
-
-	    driDrawableInitVBlank( driDrawPriv );
-	 }
-
-	 newCtx->dri.drawable = driDrawPriv;
-
-	 radeonSetCliprects(newCtx);
-	 radeonUpdateViewportOffset( newCtx->glCtx );
-      }
-
-      _mesa_make_current( newCtx->glCtx,
-			  (GLframebuffer *) driDrawPriv->driverPrivate,
-			  (GLframebuffer *) driReadPriv->driverPrivate );
-
-      _mesa_update_state( newCtx->glCtx );
-   } else {
-      if (RADEON_DEBUG & DEBUG_DRI)
-	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
-      _mesa_make_current( NULL, NULL, NULL );
-   }
-
-   if (RADEON_DEBUG & DEBUG_DRI)
-      fprintf(stderr, "End %s\n", __FUNCTION__);
-   return GL_TRUE;
-}
-
-/* Force the context `c' to be unbound from its buffer.
- */
-GLboolean
-radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
-{
-   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
-
-   if (RADEON_DEBUG & DEBUG_DRI)
-      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx);
-
-   return GL_TRUE;
-}
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index 53df766f8cb..4e2c52c835c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "drm.h"
 #include "radeon_drm.h"
 #include "texmem.h"
-
 #include "main/macros.h"
 #include "main/mtypes.h"
 #include "main/colormac.h"
-
-struct radeon_context;
-typedef struct radeon_context radeonContextRec;
-typedef struct radeon_context *radeonContextPtr;
-
-/* This union is used to avoid warnings/miscompilation
-   with float to uint32_t casts due to strict-aliasing */
-typedef union {
-	GLfloat f;
-	uint32_t ui32;
-} float_ui32_type;
-
-#include "radeon_lock.h"
 #include "radeon_screen.h"
-#include "main/mm.h"
-
-#include "math/m_vector.h"
-
-#define TEX_0   0x1
-#define TEX_1   0x2
-#define TEX_2   0x4
-#define TEX_ALL 0x7
-
-/* Rasterizing fallbacks */
-/* See correponding strings in r200_swtcl.c */
-#define RADEON_FALLBACK_TEXTURE		0x0001
-#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
-#define RADEON_FALLBACK_STENCIL		0x0004
-#define RADEON_FALLBACK_RENDER_MODE	0x0008
-#define RADEON_FALLBACK_BLEND_EQ	0x0010
-#define RADEON_FALLBACK_BLEND_FUNC	0x0020
-#define RADEON_FALLBACK_DISABLE 	0x0040
-#define RADEON_FALLBACK_BORDER_MODE	0x0080
-
-/* The blit width for texture uploads
- */
-#define BLIT_WIDTH_BYTES 1024
 
-/* Use the templated vertex format:
- */
-#define COLOR_IS_RGBA
-#define TAG(x) radeon##x
-#include "tnl_dd/t_dd_vertex.h"
-#undef TAG
-
-typedef void (*radeon_tri_func) (radeonContextPtr,
-				 radeonVertex *,
-				 radeonVertex *, radeonVertex *);
-
-typedef void (*radeon_line_func) (radeonContextPtr,
-				  radeonVertex *, radeonVertex *);
+#include "radeon_common.h"
 
-typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
-
-struct radeon_colorbuffer_state {
-	GLuint clear;
-	int roundEnable;
-};
 
-struct radeon_depthbuffer_state {
-	GLuint clear;
-	GLfloat scale;
-};
+struct r100_context;
+typedef struct r100_context r100ContextRec;
+typedef struct r100_context *r100ContextPtr;
 
-struct radeon_scissor_state {
-	drm_clip_rect_t rect;
-	GLboolean enabled;
+#include "radeon_lock.h"
 
-	GLuint numClipRects;	/* Cliprects active */
-	GLuint numAllocedClipRects;	/* Cliprects available */
-	drm_clip_rect_t *pClipRects;
-};
 
-struct radeon_stencilbuffer_state {
-	GLboolean hwBuffer;
-	GLuint clear;		/* rb3d_stencilrefmask value */
-};
 
-struct radeon_stipple_state {
-	GLuint mask[32];
-};
+#define R100_TEX_ALL 0x7
 
 /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
 #define RADEON_ST_BIT(unit) \
@@ -141,42 +73,6 @@ struct radeon_stipple_state {
 #define RADEON_Q_BIT(unit) \
 (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
 
-typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
-
-/* Texture object in locally shared texture space.
- */
-struct radeon_tex_obj {
-	driTextureObject base;
-
-	GLuint bufAddr;		/* Offset to start of locally
-				   shared texture block */
-
-	GLuint dirty_state;	/* Flags (1 per texunit) for
-				   whether or not this texobj
-				   has dirty hardware state
-				   (pp_*) that needs to be
-				   brought into the
-				   texunit. */
-
-	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
-	/* Six, for the cube faces */
-
-	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
-
-	GLuint pp_txfilter;	/* hardware register values */
-	GLuint pp_txformat;
-	GLuint pp_txoffset;	/* Image location in texmem.
-				   All cube faces follow. */
-	GLuint pp_txsize;	/* npot only */
-	GLuint pp_txpitch;	/* npot only */
-	GLuint pp_border_color;
-	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
-
-	GLboolean border_fallback;
-
-	GLuint tile_bits;	/* hw texture tile bits used on this texture */
-};
-
 struct radeon_texture_env_state {
 	radeonTexObjPtr texobj;
 	GLenum format;
@@ -187,17 +83,6 @@ struct radeon_texture_state {
 	struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
 };
 
-struct radeon_state_atom {
-	struct radeon_state_atom *next, *prev;
-	const char *name;	/* for debug */
-	int cmd_size;		/* size in bytes */
-	GLuint is_tcl;
-	int *cmd;		/* one or more cmd's */
-	int *lastcmd;		/* one or more cmd's */
-	GLboolean dirty;	/* dirty-mark in emit_state_list */
-	 GLboolean(*check) (GLcontext *);	/* is this state active? */
-};
-
 /* Trying to keep these relatively short as the variables are becoming
  * extravagently long.  Drop the driver name prefix off the front of
  * everything - I think we know which driver we're in by now, and keep the
@@ -410,10 +295,16 @@ struct radeon_state_atom {
 #define SHN_SHININESS      1
 #define SHN_STATE_SIZE     2
 
-struct radeon_hw_state {
-	/* Head of the linked list of state atoms. */
-	struct radeon_state_atom atomlist;
+#define R100_QUERYOBJ_CMD_0  0
+#define R100_QUERYOBJ_DATA_0 1
+#define R100_QUERYOBJ_CMDSIZE  2
+
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
 
+struct r100_hw_state {
 	/* Hardware state, stored as cmdbuf commands:  
 	 *   -- Need to doublebuffer for
 	 *           - eliding noop statechange loops? (except line stipple count)
@@ -437,90 +328,19 @@ struct radeon_hw_state {
 	struct radeon_state_atom fog;
 	struct radeon_state_atom glt;
 	struct radeon_state_atom txr[3];	/* for NPOT */
-
-	int max_state_size;	/* Number of bytes necessary for a full state emit. */
-	GLboolean is_dirty, all_dirty;
-};
-
-struct radeon_state {
-	/* Derived state for internal purposes:
-	 */
-	struct radeon_colorbuffer_state color;
-	struct radeon_depthbuffer_state depth;
-	struct radeon_scissor_state scissor;
-	struct radeon_stencilbuffer_state stencil;
-	struct radeon_stipple_state stipple;
-	struct radeon_texture_state texture;
-};
-
-/* Need refcounting on dma buffers:
- */
-struct radeon_dma_buffer {
-	int refcount;		/* the number of retained regions in buf */
-	drmBufPtr buf;
-};
-
-#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset +			\
-			(rvb)->address - rmesa->dma.buf0_address +	\
-			(rvb)->start)
-
-/* A retained region, eg vertices for indexed vertices.
- */
-struct radeon_dma_region {
-	struct radeon_dma_buffer *buf;
-	char *address;		/* == buf->address */
-	int start, end, ptr;	/* offsets from start of buf */
-	int aos_start;
-	int aos_stride;
-	int aos_size;
-};
-
-struct radeon_dma {
-	/* Active dma region.  Allocations for vertices and retained
-	 * regions come from here.  Also used for emitting random vertices,
-	 * these may be flushed by calling flush_current();
-	 */
-	struct radeon_dma_region current;
-
-	void (*flush) (radeonContextPtr);
-
-	char *buf0_address;	/* start of buf[0], for index calcs */
-	GLuint nr_released_bufs;	/* flush after so many buffers released */
+	struct radeon_state_atom stp;
 };
 
-struct radeon_dri_mirror {
-	__DRIcontextPrivate *context;	/* DRI context */
-	__DRIscreenPrivate *screen;	/* DRI screen */
-
-   /**
-    * DRI drawable bound to this context for drawing.
-    */
-	__DRIdrawablePrivate *drawable;
 
-   /**
-    * DRI drawable bound to this context for reading.
-    */
-	__DRIdrawablePrivate *readable;
-
-	drm_context_t hwContext;
-	drm_hw_lock_t *hwLock;
-	int fd;
-	int drmMinor;
+struct r100_state {
+	struct radeon_texture_state texture;
 };
 
 #define RADEON_CMD_BUF_SZ  (8*1024)
-
-struct radeon_store {
-	GLuint statenr;
-	GLuint primnr;
-	char cmd_buf[RADEON_CMD_BUF_SZ];
-	int cmd_used;
-	int elts_start;
-};
-
+#define R200_ELT_BUF_SZ  (8*1024)
 /* radeon_tcl.c
  */
-struct radeon_tcl_info {
+struct r100_tcl_info {
 	GLuint vertex_format;
 	GLuint hw_primitive;
 
@@ -529,30 +349,18 @@ struct radeon_tcl_info {
 	 */
 	GLvector4f ObjClean;
 
-	struct radeon_dma_region *aos_components[8];
-	GLuint nr_aos_components;
-
 	GLuint *Elts;
 
-	struct radeon_dma_region indexed_verts;
-	struct radeon_dma_region obj;
-	struct radeon_dma_region rgba;
-	struct radeon_dma_region spec;
-	struct radeon_dma_region fog;
-	struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
-	struct radeon_dma_region norm;
+        int elt_cmd_offset;
+	int elt_cmd_start;
+        int elt_used;
 };
 
 /* radeon_swtcl.c
  */
-struct radeon_swtcl_info {
-	GLuint RenderIndex;
-	GLuint vertex_size;
+struct r100_swtcl_info {
 	GLuint vertex_format;
 
-	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
-	GLuint vertex_attr_count;
-
 	GLubyte *verts;
 
 	/* Fallback rasterization functions
@@ -561,10 +369,6 @@ struct radeon_swtcl_info {
 	radeon_line_func draw_line;
 	radeon_tri_func draw_tri;
 
-	GLuint hw_primitive;
-	GLenum render_primitive;
-	GLuint numverts;
-
    /**
     * Offset of the 4UB color data within a hardware (swtcl) vertex.
     */
@@ -576,22 +380,9 @@ struct radeon_swtcl_info {
 	GLuint specoffset;
 
 	GLboolean needproj;
-
-	struct radeon_dma_region indexed_verts;
 };
 
-struct radeon_ioctl {
-	GLuint vertex_offset;
-	GLuint vertex_size;
-};
 
-#define RADEON_MAX_PRIMS 64
-
-struct radeon_prim {
-	GLuint start;
-	GLuint end;
-	GLuint prim;
-};
 
 /* A maximum total of 20 elements per vertex:  3 floats for position, 3
  * floats for normal, 4 floats for color, 4 bytes for secondary color,
@@ -602,59 +393,18 @@ struct radeon_prim {
  */
 #define RADEON_MAX_VERTEX_SIZE 20
 
-struct radeon_context {
-	GLcontext *glCtx;	/* Mesa context */
+struct r100_context {
+        struct radeon_context radeon;
 
 	/* Driver and hardware state management
 	 */
-	struct radeon_hw_state hw;
-	struct radeon_state state;
-
-	/* Texture object bookkeeping
-	 */
-	unsigned nr_heaps;
-	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
-	driTextureObject swapped;
-	int texture_depth;
-	float initialMaxAnisotropy;
-
-	/* Rasterization and vertex state:
-	 */
-	GLuint TclFallback;
-	GLuint Fallback;
-	GLuint NewGLState;
-	 DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
+	struct r100_hw_state hw;
+	struct r100_state state;
 
 	/* Vertex buffers
 	 */
 	struct radeon_ioctl ioctl;
-	struct radeon_dma dma;
 	struct radeon_store store;
-	/* A full state emit as of the first state emit in the main store, in case
-	 * the context is lost.
-	 */
-	struct radeon_store backup_store;
-
-	/* Page flipping
-	 */
-	GLuint doPageFlip;
-
-	/* Busy waiting
-	 */
-	GLuint do_usleeps;
-	GLuint do_irqs;
-	GLuint irqsEmitted;
-	drm_radeon_irq_wait_t iw;
-
-	/* Drawable, cliprect and scissor information
-	 */
-	GLuint numClipRects;	/* Cliprects for the draw buffer */
-	drm_clip_rect_t *pClipRects;
-	unsigned int lastStamp;
-	GLboolean lost_context;
-	GLboolean save_on_next_emit;
-	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
-	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
 
 	/* TCL stuff
 	 */
@@ -667,29 +417,13 @@ struct radeon_context {
 	GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
 	GLuint last_ReallyEnabled;
 
-	/* VBI
-	 */
-	int64_t swap_ust;
-	int64_t swap_missed_ust;
-
-	GLuint swap_count;
-	GLuint swap_missed_count;
-
 	/* radeon_tcl.c
 	 */
-	struct radeon_tcl_info tcl;
+	struct r100_tcl_info tcl;
 
 	/* radeon_swtcl.c
 	 */
-	struct radeon_swtcl_info swtcl;
-
-	/* Mirrors of some DRI state
-	 */
-	struct radeon_dri_mirror dri;
-
-	/* Configuration cache
-	 */
-	driOptionCache optionCache;
+	struct r100_swtcl_info swtcl;
 
 	GLboolean using_hyperz;
 	GLboolean texmicrotile;
@@ -703,61 +437,19 @@ struct radeon_context {
 	GLuint c_textureSwaps;
 	GLuint c_textureBytes;
 	GLuint c_vertexBuffers;
+
 };
 
-#define RADEON_CONTEXT(ctx)		((radeonContextPtr)(ctx->DriverCtx))
-
-static INLINE GLuint radeonPackColor(GLuint cpp,
-                                     GLubyte r, GLubyte g,
-                                     GLubyte b, GLubyte a)
-{
-	switch (cpp) {
-	case 2:
-		return PACK_COLOR_565(r, g, b);
-	case 4:
-		return PACK_COLOR_8888(a, r, g, b);
-	default:
-		return 0;
-	}
-}
+
+#define R100_CONTEXT(ctx)		((r100ContextPtr)(ctx->DriverCtx))
+
 
 #define RADEON_OLD_PACKETS 1
 
-extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
-extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
-				     __DRIcontextPrivate * driContextPriv,
-				     void *sharedContextPrivate);
-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
-				int x, int y, int w, int h);
-extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
-				   __DRIdrawablePrivate * driDrawPriv,
-				   __DRIdrawablePrivate * driReadPriv);
-extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
-
-/* ================================================================
- * Debugging:
- */
-#define DO_DEBUG		1
-
-#if DO_DEBUG
-extern int RADEON_DEBUG;
-#else
-#define RADEON_DEBUG		0
-#endif
-
-#define DEBUG_TEXTURE	0x0001
-#define DEBUG_STATE	0x0002
-#define DEBUG_IOCTL	0x0004
-#define DEBUG_PRIMS	0x0008
-#define DEBUG_VERTS	0x0010
-#define DEBUG_FALLBACKS	0x0020
-#define DEBUG_VFMT	0x0040
-#define DEBUG_CODEGEN	0x0080
-#define DEBUG_VERBOSE	0x0100
-#define DEBUG_DRI       0x0200
-#define DEBUG_DMA       0x0400
-#define DEBUG_SANITY    0x0800
-#define DEBUG_SYNC      0x1000
+extern GLboolean r100CreateContext( const __GLcontextModes *glVisual,
+				    __DRIcontextPrivate *driContextPriv,
+				    void *sharedContextPrivate);
+  
+
 
 #endif				/* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
new file mode 100644
index 00000000000..ab4eca31a3c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
@@ -0,0 +1,246 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CS_H
+#define RADEON_CS_H
+
+#include <stdint.h>
+#include <string.h>
+#include "drm.h"
+#include "radeon_drm.h"
+
+struct radeon_cs_reloc {
+    struct radeon_bo    *bo;
+    uint32_t            read_domain;
+    uint32_t            write_domain;
+    uint32_t            flags;
+};
+
+
+#define RADEON_CS_SPACE_OK 0
+#define RADEON_CS_SPACE_OP_TO_BIG 1
+#define RADEON_CS_SPACE_FLUSH 2
+
+struct radeon_cs_space_check {
+    struct radeon_bo *bo;
+    uint32_t read_domains;
+    uint32_t write_domain;
+    uint32_t new_accounted;
+};
+
+#define MAX_SPACE_BOS (32)
+
+struct radeon_cs_manager;
+
+struct radeon_cs {
+    struct radeon_cs_manager    *csm;
+    void                        *relocs;
+    uint32_t                    *packets;
+    unsigned                    crelocs;
+    unsigned                    relocs_total_size;
+    unsigned                    cdw;
+    unsigned                    ndw;
+    int                         section;
+    unsigned                    section_ndw;
+    unsigned                    section_cdw;
+    const char                  *section_file;
+    const char                  *section_func;
+    int                         section_line;
+    struct radeon_cs_space_check bos[MAX_SPACE_BOS];
+    int                         bo_count;
+    void                        (*space_flush_fn)(void *);
+    void                        *space_flush_data;
+};
+
+/* cs functions */
+struct radeon_cs_funcs {
+    struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
+                                   uint32_t ndw);
+    int (*cs_write_reloc)(struct radeon_cs *cs,
+                          struct radeon_bo *bo,
+                          uint32_t read_domain,
+                          uint32_t write_domain,
+                          uint32_t flags);
+    int (*cs_begin)(struct radeon_cs *cs,
+                    uint32_t ndw,
+                    const char *file,
+                    const char *func,
+                    int line);
+    int (*cs_end)(struct radeon_cs *cs,
+                  const char *file,
+                  const char *func,
+                  int line);
+    int (*cs_emit)(struct radeon_cs *cs);
+    int (*cs_destroy)(struct radeon_cs *cs);
+    int (*cs_erase)(struct radeon_cs *cs);
+    int (*cs_need_flush)(struct radeon_cs *cs);
+    void (*cs_print)(struct radeon_cs *cs, FILE *file);
+};
+
+struct radeon_cs_manager {
+    struct radeon_cs_funcs  *funcs;
+    int                     fd;
+    int32_t vram_limit, gart_limit;
+    int32_t vram_write_used, gart_write_used;
+    int32_t read_used;
+};
+
+static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+                                                 uint32_t ndw)
+{
+    return csm->funcs->cs_create(csm, ndw);
+}
+
+static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
+                                        struct radeon_bo *bo,
+                                        uint32_t read_domain,
+                                        uint32_t write_domain,
+                                        uint32_t flags)
+{
+    return cs->csm->funcs->cs_write_reloc(cs,
+                                          bo,
+                                          read_domain,
+                                          write_domain,
+                                          flags);
+}
+
+static inline int radeon_cs_begin(struct radeon_cs *cs,
+                                  uint32_t ndw,
+                                  const char *file,
+                                  const char *func,
+                                  int line)
+{
+    return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
+}
+
+static inline int radeon_cs_end(struct radeon_cs *cs,
+                                const char *file,
+                                const char *func,
+                                int line)
+{
+    return cs->csm->funcs->cs_end(cs, file, func, line);
+}
+
+static inline int radeon_cs_emit(struct radeon_cs *cs)
+{
+    return cs->csm->funcs->cs_emit(cs);
+}
+
+static inline int radeon_cs_destroy(struct radeon_cs *cs)
+{
+    return cs->csm->funcs->cs_destroy(cs);
+}
+
+static inline int radeon_cs_erase(struct radeon_cs *cs)
+{
+    return cs->csm->funcs->cs_erase(cs);
+}
+
+static inline int radeon_cs_need_flush(struct radeon_cs *cs)
+{
+    return cs->csm->funcs->cs_need_flush(cs);
+}
+
+static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file)
+{
+    cs->csm->funcs->cs_print(cs, file);
+}
+
+static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
+{
+    
+    if (domain == RADEON_GEM_DOMAIN_VRAM)
+	cs->csm->vram_limit = limit;
+    else
+	cs->csm->gart_limit = limit;
+}
+
+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
+{
+    cs->packets[cs->cdw++] = dword;
+    if (cs->section) {
+        cs->section_cdw++;
+    }
+}
+
+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
+{
+
+    memcpy(cs->packets + cs->cdw, &qword, sizeof(qword));
+    cs->cdw+=2;
+    if (cs->section) {
+        cs->section_cdw+=2;
+    }
+}
+
+static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size)
+{
+    memcpy(cs->packets + cs->cdw, data, size * 4);
+    cs->cdw += size;
+    if (cs->section) {
+	    cs->section_cdw += size;
+    }
+}
+
+static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
+{
+    cs->space_flush_fn = fn;
+    cs->space_flush_data = data;
+}
+
+
+/*
+ * add a persistent BO to the list
+ * a persistent BO is one that will be referenced across flushes,
+ * i.e. colorbuffer, textures etc.
+ * They get reset when a new "operation" happens, where an operation
+ * is a state emission with a color/textures etc followed by a bunch of vertices.
+ */
+void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs,
+				       struct radeon_bo *bo,
+				       uint32_t read_domains,
+				       uint32_t write_domain);
+
+/* reset the persistent BO list */
+void radeon_cs_space_reset_bos(struct radeon_cs *cs);
+
+/* do a space check with the current persistent BO list */
+int radeon_cs_space_check(struct radeon_cs *cs);
+
+/* do a space check with the current persistent BO list and a temporary BO
+ * a temporary BO is like a DMA buffer, which  gets flushed with the
+ * command buffer */
+int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
+				  struct radeon_bo *bo,
+				  uint32_t read_domains,
+				  uint32_t write_domain);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
new file mode 100644
index 00000000000..f1addb299e2
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -0,0 +1,409 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <errno.h>
+
+#include "radeon_bocs_wrapper.h"
+#include "radeon_common.h"
+
+struct cs_manager_legacy {
+    struct radeon_cs_manager    base;
+    struct radeon_context       *ctx;
+    /* hack for scratch stuff */
+    uint32_t                    pending_age;
+    uint32_t                    pending_count;
+
+
+};
+
+struct cs_reloc_legacy {
+    struct radeon_cs_reloc  base;
+    uint32_t                cindices;
+    uint32_t                *indices;
+};
+
+
+static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
+                                   uint32_t ndw)
+{
+    struct radeon_cs *cs;
+
+    cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
+    if (cs == NULL) {
+        return NULL;
+    }
+    cs->csm = csm;
+    cs->ndw = (ndw + 0x3FF) & (~0x3FF);
+    cs->packets = (uint32_t*)malloc(4*cs->ndw);
+    if (cs->packets == NULL) {
+        free(cs);
+        return NULL;
+    }
+    cs->relocs_total_size = 0;
+    return cs;
+}
+
+static int cs_write_reloc(struct radeon_cs *cs,
+                          struct radeon_bo *bo,
+                          uint32_t read_domain,
+                          uint32_t write_domain,
+                          uint32_t flags)
+{
+    struct cs_reloc_legacy *relocs;
+    int i;
+
+    relocs = (struct cs_reloc_legacy *)cs->relocs;
+    /* check domains */
+    if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
+        /* in one CS a bo can only be in read or write domain but not
+         * in read & write domain at the same sime
+         */
+        return -EINVAL;
+    }
+    if (read_domain == RADEON_GEM_DOMAIN_CPU) {
+        return -EINVAL;
+    }
+    if (write_domain == RADEON_GEM_DOMAIN_CPU) {
+        return -EINVAL;
+    }
+    /* check if bo is already referenced */
+    for(i = 0; i < cs->crelocs; i++) {
+        uint32_t *indices;
+
+        if (relocs[i].base.bo->handle == bo->handle) {
+            /* Check domains must be in read or write. As we check already
+             * checked that in argument one of the read or write domain was
+             * set we only need to check that if previous reloc as the read
+             * domain set then the read_domain should also be set for this
+             * new relocation.
+             */
+            if (relocs[i].base.read_domain && !read_domain) {
+                return -EINVAL;
+            }
+            if (relocs[i].base.write_domain && !write_domain) {
+                return -EINVAL;
+            }
+            relocs[i].base.read_domain |= read_domain;
+            relocs[i].base.write_domain |= write_domain;
+            /* save indice */
+            relocs[i].cindices++;
+            indices = (uint32_t*)realloc(relocs[i].indices,
+                                         relocs[i].cindices * 4);
+            if (indices == NULL) {
+                relocs[i].cindices -= 1;
+                return -ENOMEM;
+            }
+            relocs[i].indices = indices;
+            relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
+            return 0;
+        }
+    }
+    /* add bo to reloc */
+    relocs = (struct cs_reloc_legacy*)
+             realloc(cs->relocs,
+                     sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1));
+    if (relocs == NULL) {
+        return -ENOMEM;
+    }
+    cs->relocs = relocs;
+    relocs[cs->crelocs].base.bo = bo;
+    relocs[cs->crelocs].base.read_domain = read_domain;
+    relocs[cs->crelocs].base.write_domain = write_domain;
+    relocs[cs->crelocs].base.flags = flags;
+    relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
+    if (relocs[cs->crelocs].indices == NULL) {
+        return -ENOMEM;
+    }
+    relocs[cs->crelocs].indices[0] = cs->cdw - 1;
+    relocs[cs->crelocs].cindices = 1;
+    cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+    cs->crelocs++;
+    radeon_bo_ref(bo);
+    return 0;
+}
+
+static int cs_begin(struct radeon_cs *cs,
+                    uint32_t ndw,
+                    const char *file,
+                    const char *func,
+                    int line)
+{
+    if (cs->section) {
+        fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
+                cs->section_file, cs->section_func, cs->section_line);
+        fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    cs->section = 1;
+    cs->section_ndw = ndw;
+    cs->section_cdw = 0;
+    cs->section_file = file;
+    cs->section_func = func;
+    cs->section_line = line;
+
+
+    if (cs->cdw + ndw > cs->ndw) {
+        uint32_t tmp, *ptr;
+	int num = (ndw > 0x3FF) ? ndw : 0x3FF;
+
+        tmp = (cs->cdw + 1 + num) & (~num);
+        ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+        if (ptr == NULL) {
+            return -ENOMEM;
+        }
+        cs->packets = ptr;
+        cs->ndw = tmp;
+    }
+
+    return 0;
+}
+
+static int cs_end(struct radeon_cs *cs,
+                  const char *file,
+                  const char *func,
+                  int line)
+
+{
+    if (!cs->section) {
+        fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    cs->section = 0;
+    if (cs->section_ndw != cs->section_cdw) {
+        fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
+                cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
+        fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    return 0;
+}
+
+static int cs_process_relocs(struct radeon_cs *cs)
+{
+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+    struct cs_reloc_legacy *relocs;
+    int i, j, r;
+
+    csm = (struct cs_manager_legacy*)cs->csm;
+    relocs = (struct cs_reloc_legacy *)cs->relocs;
+restart:
+    for (i = 0; i < cs->crelocs; i++) 
+    {
+        for (j = 0; j < relocs[i].cindices; j++) 
+        {
+            uint32_t soffset, eoffset;
+
+            r = radeon_bo_legacy_validate(relocs[i].base.bo,
+                                           &soffset, &eoffset);
+	        if (r == -EAGAIN)
+            {
+	             goto restart;
+            }
+            if (r) 
+            {
+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+                        relocs[i].base.bo, soffset, eoffset);
+                return r;
+            }
+            cs->packets[relocs[i].indices[j]] += soffset;
+            if (cs->packets[relocs[i].indices[j]] >= eoffset) 
+            {
+	      /*                radeon_bo_debug(relocs[i].base.bo, 12); */
+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+                        relocs[i].base.bo, soffset, eoffset);
+                fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
+                        relocs[i].base.bo,
+                        cs->packets[relocs[i].indices[j]],
+                        eoffset);
+                exit(0);
+                return -EINVAL;
+            }
+        }
+    }
+    return 0;
+}
+
+static int cs_set_age(struct radeon_cs *cs)
+{
+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+    struct cs_reloc_legacy *relocs;
+    int i;
+
+    relocs = (struct cs_reloc_legacy *)cs->relocs;
+    for (i = 0; i < cs->crelocs; i++) {
+        radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
+        radeon_bo_unref(relocs[i].base.bo);
+    }
+    return 0;
+}
+
+static int cs_emit(struct radeon_cs *cs)
+{
+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+    drm_radeon_cmd_buffer_t cmd;
+    drm_r300_cmd_header_t age;
+    uint64_t ull;
+    int r;
+
+    csm->ctx->vtbl.emit_cs_header(cs, csm->ctx);
+
+    /* append buffer age */
+    if ( IS_R300_CLASS(csm->ctx->radeonScreen) )
+    { 
+      age.scratch.cmd_type = R300_CMD_SCRATCH;
+      /* Scratch register 2 corresponds to what radeonGetAge polls */
+      csm->pending_age = 0;
+      csm->pending_count = 1;
+      ull = (uint64_t) (intptr_t) &csm->pending_age;
+      age.scratch.reg = 2;
+      age.scratch.n_bufs = 1;
+      age.scratch.flags = 0;
+      radeon_cs_write_dword(cs, age.u);
+      radeon_cs_write_qword(cs, ull);
+      radeon_cs_write_dword(cs, 0);
+    }
+
+    r = cs_process_relocs(cs);
+    if (r) {
+        return 0;
+    }
+
+    cmd.buf = (char *)cs->packets;
+    cmd.bufsz = cs->cdw * 4;
+    if (csm->ctx->state.scissor.enabled) {
+        cmd.nbox = csm->ctx->state.scissor.numClipRects;
+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects;
+    } else {
+        cmd.nbox = csm->ctx->numClipRects;
+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
+    }
+
+    //dump_cmdbuf(cs);
+
+    r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+    if (r) {
+        return r;
+    }
+    if ((!IS_R300_CLASS(csm->ctx->radeonScreen)) &&
+        (!IS_R600_CLASS(csm->ctx->radeonScreen))) { /* +r6/r7 : No irq for r6/r7 yet. */
+	drm_radeon_irq_emit_t emit_cmd;
+	emit_cmd.irq_seq = (int*)&csm->pending_age;
+	r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd));
+	if (r) {
+		return r;
+	}
+    }
+    cs_set_age(cs);
+
+    cs->csm->read_used = 0;
+    cs->csm->vram_write_used = 0;
+    cs->csm->gart_write_used = 0;
+    return 0;
+}
+
+static void inline cs_free_reloc(void *relocs_p, int crelocs)
+{
+    struct cs_reloc_legacy *relocs = relocs_p;
+    int i;
+    if (!relocs_p)
+      return;
+    for (i = 0; i < crelocs; i++)
+      free(relocs[i].indices);
+}
+
+static int cs_destroy(struct radeon_cs *cs)
+{
+    cs_free_reloc(cs->relocs, cs->crelocs);
+    free(cs->relocs);
+    free(cs->packets);
+    free(cs);
+    return 0;
+}
+
+static int cs_erase(struct radeon_cs *cs)
+{
+    cs_free_reloc(cs->relocs, cs->crelocs);
+    free(cs->relocs);
+    cs->relocs_total_size = 0;
+    cs->relocs = NULL;
+    cs->crelocs = 0;
+    cs->cdw = 0;
+    cs->section = 0;
+    return 0;
+}
+
+static int cs_need_flush(struct radeon_cs *cs)
+{
+    /* this function used to flush when the BO usage got to
+     * a certain size, now the higher levels handle this better */
+    return 0;
+}
+
+static void cs_print(struct radeon_cs *cs, FILE *file)
+{
+}
+
+static struct radeon_cs_funcs  radeon_cs_legacy_funcs = {
+    cs_create,
+    cs_write_reloc,
+    cs_begin,
+    cs_end,
+    cs_emit,
+    cs_destroy,
+    cs_erase,
+    cs_need_flush,
+    cs_print,
+};
+
+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
+{
+    struct cs_manager_legacy *csm;
+
+    csm = (struct cs_manager_legacy*)
+          calloc(1, sizeof(struct cs_manager_legacy));
+    if (csm == NULL) {
+        return NULL;
+    }
+    csm->base.funcs = &radeon_cs_legacy_funcs;
+    csm->base.fd = ctx->dri.fd;
+    csm->ctx = ctx;
+    csm->pending_age = 1;
+    return (struct radeon_cs_manager*)csm;
+}
+
+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm)
+{
+    free(csm);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
new file mode 100644
index 00000000000..cafbc9e576e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
@@ -0,0 +1,40 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CS_LEGACY_H
+#define RADEON_CS_LEGACY_H
+
+struct radeon_context;
+
+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c
new file mode 100644
index 00000000000..89cbbb5a6b9
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c
@@ -0,0 +1,234 @@
+/* 
+ * Copyright © 2009 Red Hat Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "radeon_bocs_wrapper.h"
+
+struct rad_sizes {
+    int32_t op_read;
+    int32_t op_gart_write;
+    int32_t op_vram_write;
+};
+
+static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct rad_sizes *sizes)
+{
+    uint32_t read_domains, write_domain;
+    struct radeon_bo *bo;
+
+    bo = sc->bo;
+    sc->new_accounted = 0;
+    read_domains = sc->read_domains;
+    write_domain = sc->write_domain;
+
+    /* legacy needs a static check */
+    if (radeon_bo_is_static(bo)) {
+	bo->space_accounted = sc->new_accounted = (read_domains << 16) | write_domain;
+	return 0;
+    }
+
+    /* already accounted this bo */
+    if (write_domain && (write_domain == bo->space_accounted)) {
+	sc->new_accounted = bo->space_accounted;
+	return 0;
+    }
+    if (read_domains && ((read_domains << 16) == bo->space_accounted)) {
+	sc->new_accounted = bo->space_accounted;
+	return 0;
+    }
+
+    if (bo->space_accounted == 0) {
+	if (write_domain == RADEON_GEM_DOMAIN_VRAM)
+	    sizes->op_vram_write += bo->size;
+	else if (write_domain == RADEON_GEM_DOMAIN_GTT)
+	  sizes->op_gart_write += bo->size;
+	else
+	    sizes->op_read += bo->size;
+	sc->new_accounted = (read_domains << 16) | write_domain;
+    } else {
+	uint16_t old_read, old_write;
+	
+	old_read = bo->space_accounted >> 16;
+	old_write = bo->space_accounted & 0xffff;
+	
+	if (write_domain && (old_read & write_domain)) {
+	    sc->new_accounted = write_domain;
+	    /* moving from read to a write domain */
+	    if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
+		sizes->op_read -= bo->size;
+		sizes->op_vram_write += bo->size;
+	    } else if (write_domain == RADEON_GEM_DOMAIN_GTT) {
+		sizes->op_read -= bo->size;
+		sizes->op_gart_write += bo->size;
+	    }
+	} else if (read_domains & old_write) {
+	    sc->new_accounted = bo->space_accounted & 0xffff;
+	} else {
+	    /* rewrite the domains */
+	    if (write_domain != old_write)
+		fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write);
+	    if (read_domains != old_read)
+		fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read);
+	    return RADEON_CS_SPACE_FLUSH;
+	}
+    }
+    return 0;
+}
+
+static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space_check *new_tmp)
+{
+    struct radeon_cs_manager *csm = cs->csm;
+    int i;
+    struct radeon_bo *bo;
+    struct rad_sizes sizes;
+    int ret;
+
+    /* check the totals for this operation */
+
+    if (cs->bo_count == 0 && !new_tmp)
+	return 0;
+
+    memset(&sizes, 0, sizeof(struct rad_sizes));
+
+    /* prepare */
+    for (i = 0; i < cs->bo_count; i++) {
+	ret = radeon_cs_setup_bo(&cs->bos[i], &sizes);
+	if (ret)
+	    return ret;
+    }
+
+    if (new_tmp) {
+	ret = radeon_cs_setup_bo(new_tmp, &sizes);
+	if (ret)
+	    return ret;
+    }
+	
+    if (sizes.op_read < 0)
+	    sizes.op_read = 0;
+
+    /* check sizes - operation first */
+    if ((sizes.op_read + sizes.op_gart_write > csm->gart_limit) ||
+	(sizes.op_vram_write > csm->vram_limit)) {
+	    return RADEON_CS_SPACE_OP_TO_BIG;
+    }
+    
+    if (((csm->vram_write_used + sizes.op_vram_write) > csm->vram_limit) ||
+	((csm->read_used + csm->gart_write_used + sizes.op_gart_write + sizes.op_read) > csm->gart_limit)) {
+	    return RADEON_CS_SPACE_FLUSH;
+    }
+    
+    csm->gart_write_used += sizes.op_gart_write;
+    csm->vram_write_used += sizes.op_vram_write;
+    csm->read_used += sizes.op_read;
+    /* commit */
+    for (i = 0; i < cs->bo_count; i++) {
+	    bo = cs->bos[i].bo;
+	    bo->space_accounted = cs->bos[i].new_accounted;
+    }
+    if (new_tmp)
+	new_tmp->bo->space_accounted = new_tmp->new_accounted;
+    
+    return RADEON_CS_SPACE_OK;
+}
+
+void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain)
+{
+    int i;
+    for (i = 0; i < cs->bo_count; i++) {
+	if (cs->bos[i].bo == bo &&
+	    cs->bos[i].read_domains == read_domains &&
+	    cs->bos[i].write_domain == write_domain)
+	    return;
+    }
+    radeon_bo_ref(bo);
+    i = cs->bo_count;
+    cs->bos[i].bo = bo;
+    cs->bos[i].read_domains = read_domains;
+    cs->bos[i].write_domain = write_domain;
+    cs->bos[i].new_accounted = 0;
+    cs->bo_count++;
+
+    assert(cs->bo_count < MAX_SPACE_BOS);
+}
+
+static int radeon_cs_check_space_internal(struct radeon_cs *cs, struct radeon_cs_space_check *tmp_bo)
+{
+    int ret;
+    int flushed = 0;
+
+again:
+    ret = radeon_cs_do_space_check(cs, tmp_bo);
+    if (ret == RADEON_CS_SPACE_OP_TO_BIG)
+	return -1;
+    if (ret == RADEON_CS_SPACE_FLUSH) {
+	(*cs->space_flush_fn)(cs->space_flush_data);
+	if (flushed)
+	    return -1;
+	flushed = 1;
+	goto again;
+    }
+    return 0;
+}
+
+int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
+				  struct radeon_bo *bo,
+				  uint32_t read_domains, uint32_t write_domain)
+{									
+    struct radeon_cs_space_check temp_bo;
+    int ret = 0;
+
+    if (bo) {
+	temp_bo.bo = bo;
+	temp_bo.read_domains = read_domains;
+	temp_bo.write_domain = write_domain;
+	temp_bo.new_accounted = 0;
+    }
+
+    ret = radeon_cs_check_space_internal(cs, bo ? &temp_bo : NULL);
+    return ret;
+}
+
+int radeon_cs_space_check(struct radeon_cs *cs)
+{
+    return radeon_cs_check_space_internal(cs, NULL);
+}
+
+void radeon_cs_space_reset_bos(struct radeon_cs *cs)
+{
+    int i;
+    for (i = 0; i < cs->bo_count; i++) {
+	radeon_bo_unref(cs->bos[i].bo);
+	cs->bos[i].bo = NULL;
+	cs->bos[i].read_domains = 0;
+	cs->bos[i].write_domain = 0;
+	cs->bos[i].new_accounted = 0;
+    }
+    cs->bo_count = 0;
+}
+
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.c b/src/mesa/drivers/dri/radeon/radeon_debug.c
new file mode 100644
index 00000000000..3b6f0038037
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_debug.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Pauli Nieminen <suokkos@gmail.com>
+ */
+
+#include "utils.h"
+
+#include "radeon_debug.h"
+#include "radeon_common_context.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+static const struct dri_debug_control debug_control[] = {
+	{"fall", RADEON_FALLBACKS},
+	{"tex", RADEON_TEXTURE},
+	{"ioctl", RADEON_IOCTL},
+	{"verts", RADEON_RENDER},
+	{"render", RADEON_RENDER},
+	{"swrender", RADEON_SWRENDER},
+	{"state", RADEON_STATE},
+	{"shader", RADEON_SHADER},
+	{"vfmt", RADEON_VFMT},
+	{"vtxf", RADEON_VFMT},
+	{"dri", RADEON_DRI},
+	{"dma", RADEON_DMA},
+	{"sanity", RADEON_SANITY},
+	{"sync", RADEON_SYNC},
+	{"pixel", RADEON_PIXEL},
+	{"mem", RADEON_MEMORY},
+	{"cs", RADEON_CS},
+	{"allmsg", ~RADEON_SYNC}, /* avoid the term "sync" because the parser uses strstr */
+	{NULL, 0}
+};
+
+radeon_debug_type_t radeon_enabled_debug_types;
+
+void radeon_init_debug(void)
+{
+	radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+
+	radeon_enabled_debug_types |= RADEON_GENERAL;
+}
+
+void _radeon_debug_add_indent(void)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	const size_t length = sizeof(radeon->debug.indent)
+		/ sizeof(radeon->debug.indent[0]);
+	if (radeon->debug.indent_depth < length - 1) {
+		radeon->debug.indent[radeon->debug.indent_depth] = '\t';
+		++radeon->debug.indent_depth;
+	};
+}
+
+void _radeon_debug_remove_indent(void)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	if (radeon->debug.indent_depth > 0) {
+		radeon->debug.indent[radeon->debug.indent_depth] = '\0';
+		--radeon->debug.indent_depth;
+	}
+}
+
+void _radeon_print(const radeon_debug_type_t type,
+	   const radeon_debug_level_t level,
+	   const char* message,
+	   ...)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	if (ctx) {
+		radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+		// FIXME: Make this multi thread safe
+		if (radeon->debug.indent_depth)
+			fprintf(stderr, "%s", radeon->debug.indent);
+	}
+	va_list values;
+	va_start( values, message );
+	vfprintf(stderr, message, values);
+	va_end( values );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.h b/src/mesa/drivers/dri/radeon/radeon_debug.h
new file mode 100644
index 00000000000..2a8302293b2
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_debug.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Pauli Nieminen <suokkos@gmail.com>
+ */
+
+#ifndef RADEON_DEBUG_H_INCLUDED
+#define RADEON_DEBUG_H_INCLUDED
+
+#include <stdlib.h>
+
+typedef enum radeon_debug_levels {
+	RADEON_CRITICAL  = 0, /* Only errors */
+	RADEON_IMPORTANT = 1, /* Important warnings and messages */
+	RADEON_NORMAL    = 2, /* Normal log messages usefull for debugging */
+	RADEON_VERBOSE   = 3, /* Extra details to debugging */
+	RADEON_TRACE     = 4  /* Log about everything that happens */
+} radeon_debug_level_t;
+
+/**
+ * Compile time option to change level of debugging compiled to dri driver.
+ * Selecting critical level is not recommended because perfromance gains are
+ * going to minimal but you will lose a lot of important warnings in case of
+ * errors.
+ */
+#ifndef RADEON_DEBUG_LEVEL
+#define RADEON_DEBUG_LEVEL RADEON_VERBOSE
+#endif
+
+typedef enum radeon_debug_types {
+	RADEON_TEXTURE   = 0x00001,
+	RADEON_STATE     = 0x00002,
+	RADEON_IOCTL     = 0x00004,
+	RADEON_RENDER    = 0x00008,
+	RADEON_SWRENDER  = 0x00010,
+	RADEON_FALLBACKS = 0x00020,
+	RADEON_VFMT      = 0x00040,
+	RADEON_SHADER    = 0x00080,
+	RADEON_CS        = 0x00100,
+	RADEON_DRI       = 0x00200,
+	RADEON_DMA       = 0x00400,
+	RADEON_SANITY    = 0x00800,
+	RADEON_SYNC      = 0x01000,
+	RADEON_PIXEL     = 0x02000,
+	RADEON_MEMORY    = 0x04000,
+	RADEON_VERTS     = 0x08000,
+	RADEON_GENERAL   = 0x10000   /* Used for errors and warnings */
+} radeon_debug_type_t;
+
+#define RADEON_MAX_INDENT 5
+
+struct radeon_debug {
+       size_t indent_depth;
+       char indent[RADEON_MAX_INDENT];
+};
+
+extern radeon_debug_type_t radeon_enabled_debug_types;
+
+/**
+ * Compabibility layer for old debug code
+ **/
+#define RADEON_DEBUG radeon_enabled_debug_types
+
+static inline int radeon_is_debug_enabled(const radeon_debug_type_t type,
+	   const radeon_debug_level_t level)
+{
+       return RADEON_DEBUG_LEVEL >= level
+		&& (type & radeon_enabled_debug_types);
+}
+/*
+ * define macro for gcc specific __attribute__ if using alternative compiler
+ */
+#ifndef __GNUC__
+#define  __attribute__(x)  /*empty*/
+#endif
+
+
+extern void _radeon_print(const radeon_debug_type_t type,
+	   const radeon_debug_level_t level,
+	   const char* message,
+	   ...)  __attribute__((format(printf,3,4)));
+/**
+ * Print out debug message if channel specified by type is enabled
+ * and compile time debugging level is at least as high as level parameter
+ */
+#define radeon_print(type, level, message, ...) do {		\
+	const radeon_debug_level_t _debug_level = (level);	\
+	const radeon_debug_type_t _debug_type = (type);		\
+	/* Compile out if level of message is too high */	\
+	if (radeon_is_debug_enabled(type, level)) {		\
+		_radeon_print(_debug_type, _debug_level,	\
+			(message), ## __VA_ARGS__);		\
+	}							\
+} while(0)
+
+/**
+ * printf style function for writing error messages.
+ */
+#define radeon_error(message, ...) do {				\
+	radeon_print(RADEON_GENERAL, RADEON_CRITICAL,		\
+		(message), ## __VA_ARGS__);			\
+} while(0)
+
+/**
+ * printf style function for writing warnings.
+ */
+#define radeon_warning(message, ...) do {			\
+	radeon_print(RADEON_GENERAL, RADEON_IMPORTANT,		\
+		(message), ## __VA_ARGS__);			\
+} while(0)
+
+extern void radeon_init_debug(void);
+extern void _radeon_debug_add_indent(void);
+extern void _radeon_debug_remove_indent(void);
+
+static inline void radeon_debug_add_indent(void)
+{
+       if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+	      _radeon_debug_add_indent();
+       }
+}
+static inline void radeon_debug_remove_indent(void)
+{
+       if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+	      _radeon_debug_remove_indent();
+       }
+}
+
+/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+   I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+   with other compilers ... GLUE!
+*/
+#define WARN_ONCE(a, ...)      { \
+       static int warn##__LINE__=1; \
+       if(warn##__LINE__){ \
+               radeon_warning("*********************************WARN_ONCE*********************************\n"); \
+               radeon_warning("File %s function %s line %d\n", \
+                       __FILE__, __FUNCTION__, __LINE__); \
+               radeon_warning(  (a), ## __VA_ARGS__);\
+               radeon_warning("***************************************************************************\n"); \
+               warn##__LINE__=0;\
+               } \
+       }
+
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
new file mode 100644
index 00000000000..c9a32c808be
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
@@ -0,0 +1,476 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include <errno.h>
+#include "radeon_common.h"
+#include "main/simple_list.h"
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )					\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+			      : "0" (nr),				\
+			        "D" ((long)dst),			\
+			        "S" ((long)src) );			\
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )		\
+do {						\
+   int j;					\
+   for ( j = 0 ; j < nr ; j++ )			\
+      dst[j] = ((int *)src)[j];			\
+   dst += nr;					\
+} while (0)
+#endif
+
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 4)
+		COPY_DWORDS(out, data, count);
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out++;
+			data += stride;
+		}
+}
+
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 8)
+		COPY_DWORDS(out, data, count * 2);
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out[1] = *(int *)(data + 4);
+			out += 2;
+			data += stride;
+		}
+}
+
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 12) {
+		COPY_DWORDS(out, data, count * 3);
+    }
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out[1] = *(int *)(data + 4);
+			out[2] = *(int *)(data + 8);
+			out += 3;
+			data += stride;
+		}
+}
+
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 16)
+		COPY_DWORDS(out, data, count * 4);
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out[1] = *(int *)(data + 4);
+			out[2] = *(int *)(data + 8);
+			out[3] = *(int *)(data + 12);
+			out += 4;
+			data += stride;
+		}
+}
+
+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
+			 const GLvoid * data, int size, int stride, int count)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	uint32_t *out;
+
+	if (stride == 0) {
+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+		count = 1;
+		aos->stride = 0;
+	} else {
+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
+		aos->stride = size;
+	}
+
+	aos->components = size;
+	aos->count = count;
+
+	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+	switch (size) {
+	case 1: radeonEmitVec4(out, data, stride, count); break;
+	case 2: radeonEmitVec8(out, data, stride, count); break;
+	case 3: radeonEmitVec12(out, data, stride, count); break;
+	case 4: radeonEmitVec16(out, data, stride, count); break;
+	default:
+		assert(0);
+		break;
+	}
+}
+
+void radeon_init_dma(radeonContextPtr rmesa)
+{
+	make_empty_list(&rmesa->dma.free);
+	make_empty_list(&rmesa->dma.wait);
+	make_empty_list(&rmesa->dma.reserved);
+	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
+}
+
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
+{
+	struct radeon_dma_bo *dma_bo = NULL;
+	/* we set minimum sizes to at least requested size
+	   aligned to next 16 bytes. */
+	if (size > rmesa->dma.minimum_size)
+		rmesa->dma.minimum_size = (size + 15) & (~15);
+
+	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
+			__FUNCTION__, size, rmesa->dma.minimum_size);
+
+
+	/* unmap old reserved bo */
+	if (!is_empty_list(&rmesa->dma.reserved))
+		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
+
+	if (is_empty_list(&rmesa->dma.free)
+	      || last_elem(&rmesa->dma.free)->bo->size < size) {
+		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
+		assert(dma_bo);
+
+again_alloc:
+		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+					    0, rmesa->dma.minimum_size, 4,
+					    RADEON_GEM_DOMAIN_GTT, 0);
+
+		if (!dma_bo->bo) {
+			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
+			goto again_alloc;
+		}
+		insert_at_head(&rmesa->dma.reserved, dma_bo);
+	} else {
+		/* We push and pop buffers from end of list so we can keep
+		   counter on unused buffers for later freeing them from
+		   begin of list */
+		dma_bo = last_elem(&rmesa->dma.free);
+		assert(dma_bo->bo->cref == 1);
+		remove_from_list(dma_bo);
+		insert_at_head(&rmesa->dma.reserved, dma_bo);
+	}
+
+	rmesa->dma.current_used = 0;
+	rmesa->dma.current_vertexptr = 0;
+	
+	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
+					  first_elem(&rmesa->dma.reserved)->bo,
+					  RADEON_GEM_DOMAIN_GTT, 0))
+		fprintf(stderr,"failure to revalidate BOs - badness\n");
+
+	if (is_empty_list(&rmesa->dma.reserved)) {
+        /* Cmd buff have been flushed in radeon_revalidate_bos */
+		goto again_alloc;
+	}
+
+	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
+}
+
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+			  struct radeon_bo **pbo, int *poffset,
+			  int bytes, int alignment)
+{
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+	if (rmesa->dma.flush)
+		rmesa->dma.flush(rmesa->glCtx);
+
+	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
+
+	alignment--;
+	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
+
+	if (is_empty_list(&rmesa->dma.reserved)
+		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
+		radeonRefillCurrentDmaRegion(rmesa, bytes);
+
+	*poffset = rmesa->dma.current_used;
+	*pbo = first_elem(&rmesa->dma.reserved)->bo;
+	radeon_bo_ref(*pbo);
+
+	/* Always align to at least 16 bytes */
+	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+
+	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
+}
+
+void radeonFreeDmaRegions(radeonContextPtr rmesa)
+{
+	struct radeon_dma_bo *dma_bo;
+	struct radeon_dma_bo *temp;
+	if (RADEON_DEBUG & RADEON_DMA)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+
+	foreach_s(dma_bo, temp, &rmesa->dma.free) {
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+
+	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+
+	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+		remove_from_list(dma_bo);
+		radeon_bo_unmap(dma_bo->bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+}
+
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
+{
+	if (is_empty_list(&rmesa->dma.reserved))
+		return;
+
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
+	rmesa->dma.current_used -= return_bytes;
+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+}
+
+static int radeon_bo_is_idle(struct radeon_bo* bo)
+{
+	uint32_t domain;
+	int ret = radeon_bo_is_busy(bo, &domain);
+	if (ret == -EINVAL) {
+		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
+			"This may cause small performance drop for you.\n");
+	}
+	return ret != -EBUSY;
+}
+
+void radeonReleaseDmaRegions(radeonContextPtr rmesa)
+{
+	struct radeon_dma_bo *dma_bo;
+	struct radeon_dma_bo *temp;
+	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
+	const int time = rmesa->dma.free.expire_counter;
+
+	if (RADEON_DEBUG & RADEON_DMA) {
+		size_t free = 0,
+		       wait = 0,
+		       reserved = 0;
+		foreach(dma_bo, &rmesa->dma.free)
+			++free;
+
+		foreach(dma_bo, &rmesa->dma.wait)
+			++wait;
+
+		foreach(dma_bo, &rmesa->dma.reserved)
+			++reserved;
+
+		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
+		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
+	}
+
+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+		/* request updated cs processing information from kernel */
+		legacy_track_pending(rmesa->radeonScreen->bom, 0);
+	}
+	/* move waiting bos to free list.
+	   wait list provides gpu time to handle data before reuse */
+	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+		if (dma_bo->expire_counter == time) {
+			WARN_ONCE("Leaking dma buffer object!\n");
+			radeon_bo_unref(dma_bo->bo);
+			remove_from_list(dma_bo);
+			FREE(dma_bo);
+			continue;
+		}
+		/* free objects that are too small to be used because of large request */
+		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+		   radeon_bo_unref(dma_bo->bo);
+		   remove_from_list(dma_bo);
+		   FREE(dma_bo);
+		   continue;
+		}
+		if (!radeon_bo_is_idle(dma_bo->bo))
+			continue;
+		remove_from_list(dma_bo);
+		dma_bo->expire_counter = expire_at;
+		insert_at_tail(&rmesa->dma.free, dma_bo);
+	}
+
+	/* unmap the last dma region */
+	if (!is_empty_list(&rmesa->dma.reserved))
+		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
+	/* move reserved to wait list */
+	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+		/* free objects that are too small to be used because of large request */
+		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+		   radeon_bo_unref(dma_bo->bo);
+		   remove_from_list(dma_bo);
+		   FREE(dma_bo);
+		   continue;
+		}
+		remove_from_list(dma_bo);
+		dma_bo->expire_counter = expire_at;
+		insert_at_tail(&rmesa->dma.wait, dma_bo);
+	}
+
+	/* free bos that have been unused for some time */
+	foreach_s(dma_bo, temp, &rmesa->dma.free) {
+		if (dma_bo->expire_counter != time)
+			break;
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	struct radeon_dma *dma = &rmesa->dma;
+		
+
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+	dma->flush = NULL;
+
+	if (!is_empty_list(&dma->reserved)) {
+	    GLuint current_offset = dma->current_used;
+
+	    assert (dma->current_used +
+		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+		    dma->current_vertexptr);
+
+	    if (dma->current_used != dma->current_vertexptr) {
+		    dma->current_used = dma->current_vertexptr;
+
+		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
+	    }
+	    rmesa->swtcl.numverts = 0;
+	}
+}
+/* Alloc space in the current dma region.
+ */
+void *
+rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
+{
+	GLuint bytes = vsize * nverts;
+	void *head;
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+	if(is_empty_list(&rmesa->dma.reserved)
+	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
+		if (rmesa->dma.flush) {
+			rmesa->dma.flush(rmesa->glCtx);
+		}
+
+                radeonRefillCurrentDmaRegion(rmesa, bytes);
+
+		return NULL;
+	}
+
+        if (!rmesa->dma.flush) {
+		/* if cmdbuf flushed DMA restart */
+                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
+        }
+
+	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
+        ASSERT( rmesa->dma.current_used +
+                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+                rmesa->dma.current_vertexptr );
+
+	head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
+	rmesa->dma.current_vertexptr += bytes;
+	rmesa->swtcl.numverts += nverts;
+	return head;
+}
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
+   int i;
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (radeon->dma.flush) {
+       radeon->dma.flush(radeon->glCtx);
+   }
+   for (i = 0; i < radeon->tcl.aos_count; i++) {
+      if (radeon->tcl.aos[i].bo) {
+         radeon_bo_unref(radeon->tcl.aos[i].bo);
+         radeon->tcl.aos[i].bo = NULL;
+
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h
new file mode 100644
index 00000000000..74e653fd189
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#ifndef RADEON_DMA_H
+#define RADEON_DMA_H
+
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count);
+
+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
+			 const GLvoid * data, int size, int stride, int count);
+
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
+void radeon_init_dma(radeonContextPtr rmesa);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+			  struct radeon_bo **pbo, int *poffset,
+			  int bytes, int alignment);
+void radeonReleaseDmaRegions(radeonContextPtr rmesa);
+
+void rcommon_flush_last_swtcl_prim(GLcontext *ctx);
+
+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
+void radeonFreeDmaRegions(radeonContextPtr rmesa);
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
new file mode 100644
index 00000000000..d83b166742c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -0,0 +1,599 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Red Hat Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/texformat.h"
+#include "main/texrender.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+
+#define FILE_DEBUG_FLAG RADEON_TEXTURE
+#define DBG(...) do {                                           \
+        if (RADEON_DEBUG & FILE_DEBUG_FLAG)                      \
+                _mesa_printf(__VA_ARGS__);                      \
+} while(0)
+
+static struct gl_framebuffer *
+radeon_new_framebuffer(GLcontext *ctx, GLuint name)
+{
+  return _mesa_new_framebuffer(ctx, name);
+}
+
+static void
+radeon_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+
+  ASSERT(rrb);
+
+  if (rrb && rrb->bo) {
+    radeon_bo_unref(rrb->bo);
+  }
+  _mesa_free(rrb);
+}
+
+static void *
+radeon_get_pointer(GLcontext *ctx, struct gl_renderbuffer *rb,
+		   GLint x, GLint y)
+{
+  return NULL;
+}
+
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+  struct radeon_context *radeon = RADEON_CONTEXT(ctx);
+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+  GLboolean software_buffer = GL_FALSE;
+  int cpp;
+
+   ASSERT(rb->Name != 0);
+  switch (internalFormat) {
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      rb->_ActualFormat = GL_RGB5;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      rb->RedBits = 5;
+      rb->GreenBits = 6;
+      rb->BlueBits = 5;
+      cpp = 2;
+      break;
+   case GL_RGB:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      rb->_ActualFormat = GL_RGB8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      rb->RedBits = 8;
+      rb->GreenBits = 8;
+      rb->BlueBits = 8;
+      rb->AlphaBits = 0;
+      cpp = 4;
+      break;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      rb->_ActualFormat = GL_RGBA8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      rb->RedBits = 8;
+      rb->GreenBits = 8;
+      rb->BlueBits = 8;
+      rb->AlphaBits = 8;
+      cpp = 4;
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* alloc a depth+stencil buffer */
+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      rb->StencilBits = 8;
+      cpp = 4;
+      break;
+   case GL_DEPTH_COMPONENT16:
+      rb->_ActualFormat = GL_DEPTH_COMPONENT16;
+      rb->DataType = GL_UNSIGNED_SHORT;
+      rb->DepthBits = 16;
+      cpp = 2;
+      break;
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      rb->_ActualFormat = GL_DEPTH_COMPONENT24;
+      rb->DataType = GL_UNSIGNED_INT;
+      rb->DepthBits = 24;
+      cpp = 4;
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      rb->DepthBits = 24;
+      rb->StencilBits = 8;
+      cpp = 4;
+      break;
+   default:
+      _mesa_problem(ctx,
+                    "Unexpected format in intel_alloc_renderbuffer_storage");
+      return GL_FALSE;
+   }
+
+  if (ctx->Driver.Flush)
+	  ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+  if (rrb->bo)
+    radeon_bo_unref(rrb->bo);
+  
+    
+   if (software_buffer) {
+      return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat,
+                                             width, height);
+   }
+   else {
+     uint32_t size;
+     uint32_t pitch = ((cpp * width + 63) & ~63) / cpp;
+
+     fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width,
+	  height, pitch);
+
+     size = pitch * height * cpp;
+     rrb->pitch = pitch * cpp;
+     rrb->cpp = cpp;
+     rrb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+			      0,
+			      size,
+			      0,
+			      RADEON_GEM_DOMAIN_VRAM,
+			      0);
+     rb->Width = width;
+     rb->Height = height;
+       return GL_TRUE;
+   }    
+   
+}
+
+
+/**
+ * Called for each hardware renderbuffer when a _window_ is resized.
+ * Just update fields.
+ * Not used for user-created renderbuffers!
+ */
+static GLboolean
+radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->_ActualFormat = internalFormat;
+
+   return GL_TRUE;
+}
+
+
+static void
+radeon_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
+		     GLuint width, GLuint height)
+{
+     struct radeon_framebuffer *radeon_fb = (struct radeon_framebuffer*)fb;
+   int i;
+
+   _mesa_resize_framebuffer(ctx, fb, width, height);
+
+   fb->Initialized = GL_TRUE; /* XXX remove someday */
+
+   if (fb->Name != 0) {
+      return;
+   }
+
+   /* Make sure all window system renderbuffers are up to date */
+   for (i = 0; i < 2; i++) {
+      struct gl_renderbuffer *rb = &radeon_fb->color_rb[i]->base;
+
+      /* only resize if size is changing */
+      if (rb && (rb->Width != width || rb->Height != height)) {
+	 rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height);
+      }
+   }
+}
+
+
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+			 GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "radeon_op_alloc_storage should never be called.");
+   return GL_FALSE;
+}
+
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
+{
+    struct radeon_renderbuffer *rrb;
+
+    rrb = CALLOC_STRUCT(radeon_renderbuffer);
+    if (!rrb)
+	return NULL;
+
+    _mesa_init_renderbuffer(&rrb->base, 0);
+    rrb->base.ClassID = RADEON_RB_CLASS;
+
+    /* XXX format junk */
+    switch (format) {
+	case GL_RGB5:
+	    rrb->base._ActualFormat = GL_RGB5;
+	    rrb->base._BaseFormat = GL_RGBA;
+	    rrb->base.RedBits = 5;
+	    rrb->base.GreenBits = 6;
+	    rrb->base.BlueBits = 5;
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_RGB8:
+	    rrb->base._ActualFormat = GL_RGB8;
+	    rrb->base._BaseFormat = GL_RGB;
+	    rrb->base.RedBits = 8;
+	    rrb->base.GreenBits = 8;
+	    rrb->base.BlueBits = 8;
+	    rrb->base.AlphaBits = 0;
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_RGBA8:
+	    rrb->base._ActualFormat = GL_RGBA8;
+	    rrb->base._BaseFormat = GL_RGBA;
+	    rrb->base.RedBits = 8;
+	    rrb->base.GreenBits = 8;
+	    rrb->base.BlueBits = 8;
+	    rrb->base.AlphaBits = 8;
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_STENCIL_INDEX8_EXT:
+	    rrb->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
+	    rrb->base._BaseFormat = GL_STENCIL_INDEX;
+	    rrb->base.StencilBits = 8;
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+	    break;
+	case GL_DEPTH_COMPONENT16:
+	    rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
+	    rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+	    rrb->base.DepthBits = 16;
+	    rrb->base.DataType = GL_UNSIGNED_SHORT;
+	    break;
+	case GL_DEPTH_COMPONENT24:
+	    rrb->base._ActualFormat = GL_DEPTH_COMPONENT24;
+	    rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+	    rrb->base.DepthBits = 24;
+	    rrb->base.DataType = GL_UNSIGNED_INT;
+	    break;
+	case GL_DEPTH24_STENCIL8_EXT:
+	    rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+	    rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+	    rrb->base.DepthBits = 24;
+	    rrb->base.StencilBits = 8;
+	    rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+	    break;
+	default:
+	    fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
+	    _mesa_delete_renderbuffer(&rrb->base);
+	    return NULL;
+    }
+
+    rrb->dPriv = driDrawPriv;
+    rrb->base.InternalFormat = format;
+
+    rrb->base.Delete = radeon_delete_renderbuffer;
+    rrb->base.AllocStorage = radeon_alloc_window_storage;
+    rrb->base.GetPointer = radeon_get_pointer;
+
+    rrb->bo = NULL;
+    return rrb;
+}
+
+static struct gl_renderbuffer *
+radeon_new_renderbuffer(GLcontext * ctx, GLuint name)
+{
+  struct radeon_renderbuffer *rrb;
+
+  rrb = CALLOC_STRUCT(radeon_renderbuffer);
+  if (!rrb)
+    return NULL;
+
+  _mesa_init_renderbuffer(&rrb->base, name);
+  rrb->base.ClassID = RADEON_RB_CLASS;
+
+  rrb->base.Delete = radeon_delete_renderbuffer;
+  rrb->base.AllocStorage = radeon_alloc_renderbuffer_storage;
+  rrb->base.GetPointer = radeon_get_pointer;
+
+  return &rrb->base;
+}
+
+static void
+radeon_bind_framebuffer(GLcontext * ctx, GLenum target,
+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+      radeon_draw_buffer(ctx, fb);
+   }
+   else {
+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+   }
+}
+
+static void
+radeon_framebuffer_renderbuffer(GLcontext * ctx,
+                               struct gl_framebuffer *fb,
+                               GLenum attachment, struct gl_renderbuffer *rb)
+{
+
+	if (ctx->Driver.Flush)
+		ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+   radeon_draw_buffer(ctx, fb);
+}
+
+
+static GLboolean
+radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, 
+		     struct gl_texture_image *texImage)
+{
+	int retry = 0;
+restart:
+	if (texImage->TexFormat == &_mesa_texformat_argb8888) {
+		rrb->cpp = 4;
+		rrb->base._ActualFormat = GL_RGBA8;
+		rrb->base._BaseFormat = GL_RGBA;
+		rrb->base.DataType = GL_UNSIGNED_BYTE;
+		DBG("Render to RGBA8 texture OK\n");
+	}
+	else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
+		rrb->cpp = 2;
+		rrb->base._ActualFormat = GL_RGB5;
+		rrb->base._BaseFormat = GL_RGB;
+		rrb->base.DataType = GL_UNSIGNED_BYTE;
+		DBG("Render to RGB5 texture OK\n");
+	}
+	else if (texImage->TexFormat == &_mesa_texformat_argb1555) {
+		rrb->cpp = 2;
+		rrb->base._ActualFormat = GL_RGB5_A1;
+		rrb->base._BaseFormat = GL_RGBA;
+		rrb->base.DataType = GL_UNSIGNED_BYTE;
+		DBG("Render to ARGB1555 texture OK\n");
+	}
+	else if (texImage->TexFormat == &_mesa_texformat_argb4444) {
+		rrb->cpp = 2;
+		rrb->base._ActualFormat = GL_RGBA4;
+		rrb->base._BaseFormat = GL_RGBA;
+		rrb->base.DataType = GL_UNSIGNED_BYTE;
+		DBG("Render to ARGB1555 texture OK\n");
+	}
+	else if (texImage->TexFormat == &_mesa_texformat_z16) {
+		rrb->cpp = 2;
+		rrb->base._ActualFormat = GL_DEPTH_COMPONENT16;
+		rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+		rrb->base.DataType = GL_UNSIGNED_SHORT;
+		DBG("Render to DEPTH16 texture OK\n");
+	}
+	else if (texImage->TexFormat == &_mesa_texformat_s8_z24) {
+		rrb->cpp = 4;
+		rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+		rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+		rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+		DBG("Render to DEPTH_STENCIL texture OK\n");
+	}
+	else {
+		/* try redoing the FBO */
+		if (retry == 1) {
+			DBG("Render to texture BAD FORMAT %d\n",
+			    texImage->TexFormat->MesaFormat);
+			return GL_FALSE;
+		}
+		texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0,
+								texImage->TexFormat->DataType,
+								1);
+
+		retry++;
+		goto restart;
+	}
+	
+	rrb->pitch = texImage->Width * rrb->cpp;
+	rrb->base.InternalFormat = rrb->base._ActualFormat;
+	rrb->base.Width = texImage->Width;
+	rrb->base.Height = texImage->Height;
+	rrb->base.RedBits = texImage->TexFormat->RedBits;
+	rrb->base.GreenBits = texImage->TexFormat->GreenBits;
+	rrb->base.BlueBits = texImage->TexFormat->BlueBits;
+	rrb->base.AlphaBits = texImage->TexFormat->AlphaBits;
+	rrb->base.DepthBits = texImage->TexFormat->DepthBits;
+	rrb->base.StencilBits = texImage->TexFormat->StencilBits;
+	
+	rrb->base.Delete = radeon_delete_renderbuffer;
+	rrb->base.AllocStorage = radeon_nop_alloc_storage;
+	
+	return GL_TRUE;
+}
+
+
+static struct radeon_renderbuffer *
+radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+  const GLuint name = ~0;   /* not significant, but distinct for debugging */
+  struct radeon_renderbuffer *rrb;
+
+   /* make an radeon_renderbuffer to wrap the texture image */
+   rrb = CALLOC_STRUCT(radeon_renderbuffer);
+   if (!rrb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&rrb->base, name);
+   rrb->base.ClassID = RADEON_RB_CLASS;
+
+   if (!radeon_update_wrapper(ctx, rrb, texImage)) {
+      _mesa_free(rrb);
+      return NULL;
+   }
+
+   return rrb;
+  
+}
+static void
+radeon_render_texture(GLcontext * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct gl_texture_image *newImage
+      = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(att->Renderbuffer);
+   radeon_texture_image *radeon_image;
+   GLuint imageOffset;
+
+   (void) fb;
+
+   ASSERT(newImage);
+
+   if (newImage->Border != 0) {
+      /* Fallback on drawing to a texture with a border, which won't have a
+       * miptree.
+       */
+      _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+      _mesa_render_texture(ctx, fb, att);
+      return;
+   }
+   else if (!rrb) {
+      rrb = radeon_wrap_texture(ctx, newImage);
+      if (rrb) {
+         /* bind the wrapper to the attachment point */
+         _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base);
+      }
+      else {
+         /* fallback to software rendering */
+         _mesa_render_texture(ctx, fb, att);
+         return;
+      }
+   }
+
+   if (!radeon_update_wrapper(ctx, rrb, newImage)) {
+       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+       _mesa_render_texture(ctx, fb, att);
+       return;
+   }
+
+   DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
+       _glthread_GetID(),
+       att->Texture->Name, newImage->Width, newImage->Height,
+       rrb->base.RefCount);
+
+   /* point the renderbufer's region to the texture image region */
+   radeon_image = (radeon_texture_image *)newImage;
+   if (rrb->bo != radeon_image->mt->bo) {
+      if (rrb->bo)
+  	radeon_bo_unref(rrb->bo);
+      rrb->bo = radeon_image->mt->bo;
+      radeon_bo_ref(rrb->bo);
+   }
+
+   /* compute offset of the particular 2D image within the texture region */
+   imageOffset = radeon_miptree_image_offset(radeon_image->mt,
+                                            att->CubeMapFace,
+                                            att->TextureLevel);
+
+   if (att->Texture->Target == GL_TEXTURE_3D) {
+      GLuint offsets[6];
+      radeon_miptree_depth_offsets(radeon_image->mt, att->TextureLevel,
+				   offsets);
+      imageOffset += offsets[att->Zoffset];
+   }
+
+   /* store that offset in the region */
+   rrb->draw_offset = imageOffset;
+
+   /* update drawing region, etc */
+   radeon_draw_buffer(ctx, fb);
+}
+
+static void
+radeon_finish_render_texture(GLcontext * ctx,
+                            struct gl_renderbuffer_attachment *att)
+{
+
+}
+static void
+radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+}
+
+void radeon_fbo_init(struct radeon_context *radeon)
+{
+  radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer;
+  radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer;
+  radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer;
+  radeon->glCtx->Driver.FramebufferRenderbuffer = radeon_framebuffer_renderbuffer;
+  radeon->glCtx->Driver.RenderTexture = radeon_render_texture;
+  radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
+  radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
+  radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
+  radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
+}
+
+  
+void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+				struct radeon_bo *bo)
+{
+  struct radeon_bo *old;
+  old = rb->bo;
+  rb->bo = bo;
+  radeon_bo_ref(bo);
+  if (old)
+    radeon_bo_unref(old);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index 09acf6b4f85..a0106d00fa2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -35,7 +35,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include <sched.h>
-#include <errno.h> 
+#include <errno.h>
+
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/blend.h"
+#include "main/bufferobj.h"
+#include "main/buffers.h"
+#include "main/depth.h"
+#include "main/shaders.h"
+#include "main/texstate.h"
+#include "main/varray.h"
+#include "glapi/dispatch.h"
+#include "swrast/swrast.h"
+#include "main/stencil.h"
+#include "main/matrix.h"
 
 #include "main/glheader.h"
 #include "main/imports.h"
@@ -43,6 +57,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "swrast/swrast.h"
 
 #include "radeon_context.h"
+#include "radeon_common.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
 #include "radeon_tcl.h"
@@ -58,75 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define RADEON_IDLE_RETRY           16
 
 
-static void radeonWaitForIdle( radeonContextPtr rmesa );
-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
-				    const char * caller );
-
-static void print_state_atom( struct radeon_state_atom *state )
-{
-   int i;
-
-   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
-
-   if (RADEON_DEBUG & DEBUG_VERBOSE) 
-      for (i = 0 ; i < state->cmd_size ; i++) 
-	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
-
-}
-
-static void radeonSaveHwState( radeonContextPtr rmesa )
-{
-   struct radeon_state_atom *atom;
-   char * dest = rmesa->backup_store.cmd_buf;
-
-   if (RADEON_DEBUG & DEBUG_STATE)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-   
-   rmesa->backup_store.cmd_used = 0;
-
-   foreach( atom, &rmesa->hw.atomlist ) {
-      if ( atom->check( rmesa->glCtx ) ) {
-	 int size = atom->cmd_size * 4;
-	 memcpy( dest, atom->cmd, size);
-	 dest += size;
-	 rmesa->backup_store.cmd_used += size;
-	 if (RADEON_DEBUG & DEBUG_STATE)
-	    print_state_atom( atom );
-      }
-   }
-
-   assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
-   if (RADEON_DEBUG & DEBUG_STATE)
-      fprintf(stderr, "Returning to radeonEmitState\n");
-}
-
-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
- * we need to unwire our current cmdbuf, hook the one with the saved state in
- * it, flush it, and then put the current one back.  This is so commands at the
- * start of a cmdbuf can rely on the state being kept from the previous one.
- */
-static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
-{
-   GLuint nr_released_bufs;
-   struct radeon_store saved_store;
-
-   if (rmesa->backup_store.cmd_used == 0)
-      return;
-
-   if (RADEON_DEBUG & DEBUG_STATE)
-      fprintf(stderr, "Emitting backup state on lost context\n");
-
-   rmesa->lost_context = GL_FALSE;
-
-   nr_released_bufs = rmesa->dma.nr_released_bufs;
-   saved_store = rmesa->store;
-   rmesa->dma.nr_released_bufs = 0;
-   rmesa->store = rmesa->backup_store;
-   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
-   rmesa->dma.nr_released_bufs = nr_released_bufs;
-   rmesa->store = saved_store;
-}
-
 /* =============================================================
  * Kernel command buffer handling
  */
@@ -134,965 +80,382 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
 /* The state atoms will be emitted in the order they appear in the atom list,
  * so this step is important.
  */
-void radeonSetUpAtomList( radeonContextPtr rmesa )
+void radeonSetUpAtomList( r100ContextPtr rmesa )
 {
-   int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
-
-   make_empty_list(&rmesa->hw.atomlist);
-   rmesa->hw.atomlist.name = "atom-list";
-
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
+   int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
+
+   make_empty_list(&rmesa->radeon.hw.atomlist);
+   rmesa->radeon.hw.atomlist.name = "atom-list";
+
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
    for (i = 0; i < mtu; ++i) {
-       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
-       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
-       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
    }
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
    for (i = 0; i < 3 + mtu; ++i)
-      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
    for (i = 0; i < 8; ++i)
-      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
    for (i = 0; i < 6; ++i)
-      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
 }
 
-void radeonEmitState( radeonContextPtr rmesa )
+static void radeonEmitScissor(r100ContextPtr rmesa)
 {
-   struct radeon_state_atom *atom;
-   char *dest;
-
-   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if (rmesa->save_on_next_emit) {
-      radeonSaveHwState(rmesa);
-      rmesa->save_on_next_emit = GL_FALSE;
-   }
-
-   /* this code used to return here but now it emits zbs */
-
-   /* To avoid going across the entire set of states multiple times, just check
-    * for enough space for the case of emitting all state, and inline the
-    * radeonAllocCmdBuf code here without all the checks.
-    */
-   radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
-   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
-
-   /* We always always emit zbs, this is due to a bug found by keithw in
-      the hardware and rediscovered after Erics changes by me.
-      if you ever touch this code make sure you emit zbs otherwise
-      you get tcl lockups on at least M7/7500 class of chips - airlied */
-   rmesa->hw.zbs.dirty=1;
-
-   if (RADEON_DEBUG & DEBUG_STATE) {
-      foreach(atom, &rmesa->hw.atomlist) {
-	 if (atom->dirty || rmesa->hw.all_dirty) {
-	    if (atom->check(rmesa->glCtx))
-	       print_state_atom(atom);
-	    else
-	       fprintf(stderr, "skip state %s\n", atom->name);
-	 }
-      }
-   }
-
-   foreach(atom, &rmesa->hw.atomlist) {
-      if (rmesa->hw.all_dirty)
-	 atom->dirty = GL_TRUE;
-      if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) &&
-	   atom->is_tcl)
-	 atom->dirty = GL_FALSE;
-      if (atom->dirty) {
-	 if (atom->check(rmesa->glCtx)) {
-	    int size = atom->cmd_size * 4;
-	    memcpy(dest, atom->cmd, size);
-	    dest += size;
-	    rmesa->store.cmd_used += size;
-	    atom->dirty = GL_FALSE;
-	 }
-      }
-   }
-
-   assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
- 
-   rmesa->hw.is_dirty = GL_FALSE;
-   rmesa->hw.all_dirty = GL_FALSE;
+    BATCH_LOCALS(&rmesa->radeon);
+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
+       return;
+    }
+    if (rmesa->radeon.state.scissor.enabled) {
+        BEGIN_BATCH(6);
+        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE);
+        OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+        OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
+                  rmesa->radeon.state.scissor.rect.x1);
+        OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+        OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) |
+                  (rmesa->radeon.state.scissor.rect.x2));
+        END_BATCH();
+    } else {
+        BEGIN_BATCH(2);
+        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE);
+        END_BATCH();
+    }
 }
 
 /* Fire a section of the retained (indexed_verts) buffer as a regular
- * primtive.  
+ * primtive.
  */
-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
 				GLuint vertex_format,
 				GLuint primitive,
 				GLuint vertex_nr )
 {
-   drm_radeon_cmd_header_t *cmd;
-
+   BATCH_LOCALS(&rmesa->radeon);
 
    assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-   
-   radeonEmitState( rmesa );
-
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
-	      rmesa->store.cmd_used/4);
-   
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
-						       __FUNCTION__ );
+
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitScissor(rmesa);
+
 #if RADEON_OLD_PACKETS
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
-   cmd[2].i = rmesa->ioctl.vertex_offset;
-   cmd[3].i = vertex_nr;
-   cmd[4].i = vertex_format;
-   cmd[5].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
-   if (RADEON_DEBUG & DEBUG_PRIMS)
-      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
-	      __FUNCTION__,
-	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+   BEGIN_BATCH(8);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+     OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   } else {
+     OUT_BATCH(rmesa->ioctl.vertex_offset);
+   }
+
+   OUT_BATCH(vertex_nr);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			   rmesa->ioctl.bo,
+			   RADEON_GEM_DOMAIN_GTT,
+			   0, 0);
+   }
+
+   END_BATCH();
+
 #else
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
-   cmd[2].i = vertex_format;
-   cmd[3].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
-	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
-
-   if (RADEON_DEBUG & DEBUG_PRIMS)
-      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
-	      __FUNCTION__,
-	      cmd[1].i, cmd[2].i, cmd[3].i);
+   BEGIN_BATCH(4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+   END_BATCH();
 #endif
 }
 
-
-void radeonFlushElts( radeonContextPtr rmesa )
+void radeonFlushElts( GLcontext *ctx )
 {
-   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
-   int dwords;
-#if RADEON_OLD_PACKETS
-   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
-#else
-   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
-#endif
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&rmesa->radeon);
+   int nr;
+   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
+   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
 
-   if (RADEON_DEBUG & DEBUG_IOCTL)
+   if (RADEON_DEBUG & RADEON_IOCTL)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
-   assert( rmesa->dma.flush == radeonFlushElts );
-   rmesa->dma.flush = NULL;
+   assert( rmesa->radeon.dma.flush == radeonFlushElts );
+   rmesa->radeon.dma.flush = NULL;
 
-   /* Cope with odd number of elts:
-    */
-   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
-   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
+   nr = rmesa->tcl.elt_used;
 
 #if RADEON_OLD_PACKETS
-   cmd[1] |= (dwords - 3) << 16;
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     dwords -= 2;
+   }
+#endif
+
+#if RADEON_OLD_PACKETS
+   cmd[1] |= (dwords + 3) << 16;
    cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
 #else
-   cmd[1] |= (dwords - 3) << 16;
+   cmd[1] |= (dwords + 2) << 16;
    cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
 #endif
 
-   if (RADEON_DEBUG & DEBUG_SYNC) {
+   rmesa->radeon.cmdbuf.cs->cdw += dwords;
+   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
+
+#if RADEON_OLD_PACKETS
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			    rmesa->ioctl.bo,
+			    RADEON_GEM_DOMAIN_GTT,
+			    0, 0);
+   }
+#endif
+
+   END_BATCH();
+
+   if (RADEON_DEBUG & RADEON_SYNC) {
       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
-      radeonFinish( rmesa->glCtx );
+      radeonFinish( rmesa->radeon.glCtx );
    }
-}
 
+}
 
-GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
 				    GLuint vertex_format,
 				    GLuint primitive,
 				    GLuint min_nr )
 {
-   drm_radeon_cmd_header_t *cmd;
    GLushort *retval;
+   int align_min_nr;
+   BATCH_LOCALS(&rmesa->radeon);
 
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
 
    assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-   
-   radeonEmitState( rmesa );
-   
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
-						       ELTS_BUFSZ(min_nr),
-						       __FUNCTION__ );
+
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitScissor(rmesa);
+
+   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
+
+   /* round up min_nr to align the state */
+   align_min_nr = (min_nr + 1) & ~1;
+
 #if RADEON_OLD_PACKETS
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
-   cmd[2].i = rmesa->ioctl.vertex_offset;
-   cmd[3].i = 0xffff;
-   cmd[4].i = vertex_format;
-   cmd[5].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
-   retval = (GLushort *)(cmd+6);
-#else   
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
-   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
-   cmd[2].i = vertex_format;
-   cmd[3].i = (primitive | 
-	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
-	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
-   retval = (GLushort *)(cmd+4);
+   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+     OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   } else {
+     OUT_BATCH(rmesa->ioctl.vertex_offset);
+   }
+   OUT_BATCH(rmesa->ioctl.vertex_max);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+#else
+   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
 #endif
 
-   if (RADEON_DEBUG & DEBUG_PRIMS)
-      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
-	      __FUNCTION__,
-	      cmd[1].i, vertex_format, primitive);
 
-   assert(!rmesa->dma.flush);
-   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-   rmesa->dma.flush = radeonFlushElts;
+   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
+   rmesa->tcl.elt_used = min_nr;
 
-   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
+   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
 
-   return retval;
-}
+   if (RADEON_DEBUG & RADEON_RENDER)
+      fprintf(stderr, "%s: header prim %x \n",
+	      __FUNCTION__, primitive);
 
+   assert(!rmesa->radeon.dma.flush);
+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->radeon.dma.flush = radeonFlushElts;
 
+   return retval;
+}
 
-void radeonEmitVertexAOS( radeonContextPtr rmesa,
+void radeonEmitVertexAOS( r100ContextPtr rmesa,
 			  GLuint vertex_size,
+			  struct radeon_bo *bo,
 			  GLuint offset )
 {
 #if RADEON_OLD_PACKETS
-   rmesa->ioctl.vertex_size = vertex_size;
    rmesa->ioctl.vertex_offset = offset;
+   rmesa->ioctl.bo = bo;
 #else
-   drm_radeon_cmd_header_t *cmd;
+   BATCH_LOCALS(&rmesa->radeon);
 
-   if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+   if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_IOCTL))
       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
 	      __FUNCTION__, vertex_size, offset);
 
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
-						  __FUNCTION__ );
+   BEGIN_BATCH(7);
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
+   OUT_BATCH(1);
+   OUT_BATCH(vertex_size | (vertex_size << 8));
+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   END_BATCH();
 
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
-   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
-   cmd[2].i = 1;
-   cmd[3].i = vertex_size | (vertex_size << 8);
-   cmd[4].i = offset;
 #endif
 }
-		       
 
-void radeonEmitAOS( radeonContextPtr rmesa,
-		    struct radeon_dma_region **component,
+
+void radeonEmitAOS( r100ContextPtr rmesa,
 		    GLuint nr,
 		    GLuint offset )
 {
 #if RADEON_OLD_PACKETS
    assert( nr == 1 );
-   assert( component[0]->aos_size == component[0]->aos_stride );
-   rmesa->ioctl.vertex_size = component[0]->aos_size;
-   rmesa->ioctl.vertex_offset = 
-      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
+   rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
+   rmesa->ioctl.vertex_offset =
+     (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
+   rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count;
 #else
-   drm_radeon_cmd_header_t *cmd;
-   int sz = AOS_BUFSZ(nr);
+   BATCH_LOCALS(&rmesa->radeon);
+   uint32_t voffset;
+   //   int sz = AOS_BUFSZ(nr);
+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
    int i;
-   int *tmp;
 
-   if (RADEON_DEBUG & DEBUG_IOCTL)
+   if (RADEON_DEBUG & RADEON_IOCTL)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
-
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
-						  __FUNCTION__ );
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
-   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
-   cmd[2].i = nr;
-   tmp = &cmd[0].i;
-   cmd += 3;
-
-   for (i = 0 ; i < nr ; i++) {
-      if (i & 1) {
-	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
-		      (component[i]->aos_size << 16));
-	 cmd[2].i = (component[i]->aos_start + 
-		     offset * component[i]->aos_stride * 4);
-	 cmd += 3;
-      }
-      else {
-	 cmd[0].i = ((component[i]->aos_stride << 8) | 
-		     (component[i]->aos_size << 0));
-	 cmd[1].i = (component[i]->aos_start + 
-		     offset * component[i]->aos_stride * 4);
+   BEGIN_BATCH(sz+2+(nr * 2));
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+   OUT_BATCH(nr);
+
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+      for (i = 0; i + 1 < nr; i += 2) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[i].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[i+1].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
       }
-   }
 
-   if (RADEON_DEBUG & DEBUG_VERTS) {
-      fprintf(stderr, "%s:\n", __FUNCTION__);
-      for (i = 0 ; i < sz ; i++)
-	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
-   }
-#endif
-}
-
-/* using already shifted color_fmt! */
-void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
-		   GLuint color_fmt,
-		   GLuint src_pitch,
-		   GLuint src_offset,
-		   GLuint dst_pitch,
-		   GLuint dst_offset,
-		   GLint srcx, GLint srcy,
-		   GLint dstx, GLint dsty,
-		   GLuint w, GLuint h )
-{
-   drm_radeon_cmd_header_t *cmd;
-
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
-	      __FUNCTION__, 
-	      src_pitch, src_offset, srcx, srcy,
-	      dst_pitch, dst_offset, dstx, dsty,
-	      w, h);
-
-   assert( (src_pitch & 63) == 0 );
-   assert( (dst_pitch & 63) == 0 );
-   assert( (src_offset & 1023) == 0 ); 
-   assert( (dst_offset & 1023) == 0 ); 
-   assert( w < (1<<16) );
-   assert( h < (1<<16) );
-
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
-						  __FUNCTION__ );
-
-
-   cmd[0].i = 0;
-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
-   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
-   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
-	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-	       RADEON_GMC_BRUSH_NONE |
-	       color_fmt |
-	       RADEON_GMC_SRC_DATATYPE_COLOR |
-	       RADEON_ROP3_S |
-	       RADEON_DP_SRC_SOURCE_MEMORY |
-	       RADEON_GMC_CLR_CMP_CNTL_DIS |
-	       RADEON_GMC_WR_MSK_DIS );
-
-   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
-   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
-   cmd[5].i = (srcx << 16) | srcy;
-   cmd[6].i = (dstx << 16) | dsty; /* dst */
-   cmd[7].i = (w << 16) | h;
-}
-
-
-void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
-{
-   drm_radeon_cmd_header_t *cmd;
-
-   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
-
-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
-					   __FUNCTION__ );
-   cmd[0].i = 0;
-   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
-   cmd[0].wait.flags = flags;
-}
-
-
-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
-				    const char * caller )
-{
-   int ret, i;
-   drm_radeon_cmd_buffer_t cmd;
-
-   if (rmesa->lost_context)
-      radeonBackUpAndEmitLostStateLocked(rmesa);
-
-   if (RADEON_DEBUG & DEBUG_IOCTL) {
-      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
-
-      if (RADEON_DEBUG & DEBUG_VERBOSE) 
-	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
-	    fprintf(stderr, "%d: %x\n", i/4, 
-		    *(int *)(&rmesa->store.cmd_buf[i]));
-   }
-
-   if (RADEON_DEBUG & DEBUG_DMA)
-      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
-	      rmesa->dma.nr_released_bufs);
-
-
-   if (RADEON_DEBUG & DEBUG_SANITY) {
-      if (rmesa->state.scissor.enabled) 
-	 ret = radeonSanityCmdBuffer( rmesa, 
-				      rmesa->state.scissor.numClipRects,
-				      rmesa->state.scissor.pClipRects);
-      else
-	 ret = radeonSanityCmdBuffer( rmesa, 
-				      rmesa->numClipRects,
-				      rmesa->pClipRects);
-      if (ret) {
-	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
-	 goto out;
+      if (nr & 1) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[nr - 1].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
       }
-   }
-
-
-   cmd.bufsz = rmesa->store.cmd_used;
-   cmd.buf = rmesa->store.cmd_buf;
-
-   if (rmesa->state.scissor.enabled) {
-      cmd.nbox = rmesa->state.scissor.numClipRects;
-      cmd.boxes = rmesa->state.scissor.pClipRects;
    } else {
-      cmd.nbox = rmesa->numClipRects;
-      cmd.boxes = rmesa->pClipRects;
-   }
-
-   ret = drmCommandWrite( rmesa->dri.fd,
-			  DRM_RADEON_CMDBUF,
-			  &cmd, sizeof(cmd) );
-
-   if (ret)
-      fprintf(stderr, "drmCommandWrite: %d\n", ret);
-
-   if (RADEON_DEBUG & DEBUG_SYNC) {
-      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
-      radeonWaitForIdleLocked( rmesa );
-   }
-
- out:
-   rmesa->store.primnr = 0;
-   rmesa->store.statenr = 0;
-   rmesa->store.cmd_used = 0;
-   rmesa->dma.nr_released_bufs = 0;
-   rmesa->save_on_next_emit = 1;
-
-   return ret;
-}
-
-
-/* Note: does not emit any commands to avoid recursion on
- * radeonAllocCmdBuf.
- */
-void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
-{
-   int ret;
-
-	      
-   LOCK_HARDWARE( rmesa );
-
-   ret = radeonFlushCmdBufLocked( rmesa, caller );
-
-   UNLOCK_HARDWARE( rmesa );
-
-   if (ret) {
-      fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
-      exit(ret);
-   }
-}
-
-/* =============================================================
- * Hardware vertex buffer handling
- */
-
-
-void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
-{
-   struct radeon_dma_buffer *dmabuf;
-   int fd = rmesa->dri.fd;
-   int index = 0;
-   int size = 0;
-   drmDMAReq dma;
-   int ret;
-
-   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
-      fprintf(stderr, "%s\n", __FUNCTION__);  
-
-   if (rmesa->dma.flush) {
-      rmesa->dma.flush( rmesa );
-   }
-
-   if (rmesa->dma.current.buf)
-      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
-
-   if (rmesa->dma.nr_released_bufs > 4)
-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-
-   dma.context = rmesa->dri.hwContext;
-   dma.send_count = 0;
-   dma.send_list = NULL;
-   dma.send_sizes = NULL;
-   dma.flags = 0;
-   dma.request_count = 1;
-   dma.request_size = RADEON_BUFFER_SIZE;
-   dma.request_list = &index;
-   dma.request_sizes = &size;
-   dma.granted_count = 0;
-
-   LOCK_HARDWARE(rmesa);	/* no need to validate */
-
-   ret = drmDMA( fd, &dma );
-      
-   if (ret != 0) {
-      /* Free some up this way?
-       */
-      if (rmesa->dma.nr_released_bufs) {
-	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
-      }
-      
-      if (RADEON_DEBUG & DEBUG_DMA)
-	 fprintf(stderr, "Waiting for buffers\n");
-
-      radeonWaitForIdleLocked( rmesa );
-      ret = drmDMA( fd, &dma );
-
-      if ( ret != 0 ) {
-	 UNLOCK_HARDWARE( rmesa );
-	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
-	 exit( -1 );
-      }
-   }
-
-   UNLOCK_HARDWARE(rmesa);
-
-   if (RADEON_DEBUG & DEBUG_DMA)
-      fprintf(stderr, "Allocated buffer %d\n", index);
-
-   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
-   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
-   dmabuf->refcount = 1;
-
-   rmesa->dma.current.buf = dmabuf;
-   rmesa->dma.current.address = dmabuf->buf->address;
-   rmesa->dma.current.end = dmabuf->buf->total;
-   rmesa->dma.current.start = 0;
-   rmesa->dma.current.ptr = 0;
-
-   rmesa->c_vertexBuffers++;
-}
-
-void radeonReleaseDmaRegion( radeonContextPtr rmesa,
-			     struct radeon_dma_region *region,
-			     const char *caller )
-{
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
-   
-   if (!region->buf)
-      return;
-
-   if (rmesa->dma.flush)
-      rmesa->dma.flush( rmesa );
-
-   if (--region->buf->refcount == 0) {
-      drm_radeon_cmd_header_t *cmd;
-
-      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
-	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
-		 region->buf->buf->idx);  
-      
-      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
-						     __FUNCTION__ );
-      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
-      cmd->dma.buf_idx = region->buf->buf->idx;
-      FREE(region->buf);
-      rmesa->dma.nr_released_bufs++;
-   }
-
-   region->buf = NULL;
-   region->start = 0;
-}
-
-/* Allocates a region from rmesa->dma.current.  If there isn't enough
- * space in current, grab a new buffer (and discard what was left of current)
- */
-void radeonAllocDmaRegion( radeonContextPtr rmesa, 
-			   struct radeon_dma_region *region,
-			   int bytes,
-			   int alignment )
-{
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
-
-   if (rmesa->dma.flush)
-      rmesa->dma.flush( rmesa );
-
-   if (region->buf)
-      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
-
-   alignment--;
-   rmesa->dma.current.start = rmesa->dma.current.ptr = 
-      (rmesa->dma.current.ptr + alignment) & ~alignment;
-
-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-      radeonRefillCurrentDmaRegion( rmesa );
-
-   region->start = rmesa->dma.current.start;
-   region->ptr = rmesa->dma.current.start;
-   region->end = rmesa->dma.current.start + bytes;
-   region->address = rmesa->dma.current.address;
-   region->buf = rmesa->dma.current.buf;
-   region->buf->refcount++;
-
-   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
-   rmesa->dma.current.start = 
-      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
-}
-
-/* ================================================================
- * SwapBuffers with client-side throttling
- */
-
-static uint32_t radeonGetLastFrame (radeonContextPtr rmesa) 
-{
-   drm_radeon_getparam_t gp;
-   int ret;
-   uint32_t frame;
-
-   gp.param = RADEON_PARAM_LAST_FRAME;
-   gp.value = (int *)&frame;
-   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
-			      &gp, sizeof(gp) );
-
-   if ( ret ) {
-      fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
-      exit(1);
-   }
-
-   return frame;
-}
-
-static void radeonEmitIrqLocked( radeonContextPtr rmesa )
-{
-   drm_radeon_irq_emit_t ie;
-   int ret;
-
-   ie.irq_seq = &rmesa->iw.irq_seq;
-   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
-			      &ie, sizeof(ie) );
-   if ( ret ) {
-      fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
-      exit(1);
-   }
-}
-
-
-static void radeonWaitIrq( radeonContextPtr rmesa )
-{
-   int ret;
-
-   do {
-      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
-			     &rmesa->iw, sizeof(rmesa->iw) );
-   } while (ret && (errno == EINTR || errno == EBUSY));
-
-   if ( ret ) {
-      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
-      exit(1);
-   }
-}
-
-
-static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
-{
-   drm_radeon_sarea_t *sarea = rmesa->sarea;
-
-   if (rmesa->do_irqs) {
-      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
-	 if (!rmesa->irqsEmitted) {
-	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
-	       ;
-	 }
-	 else {
-	    UNLOCK_HARDWARE( rmesa ); 
-	    radeonWaitIrq( rmesa );	
-	    LOCK_HARDWARE( rmesa ); 
-	 }
-	 rmesa->irqsEmitted = 10;
+      for (i = 0; i + 1 < nr; i += 2) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 OUT_BATCH(voffset);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 OUT_BATCH(voffset);
       }
 
-      if (rmesa->irqsEmitted) {
-	 radeonEmitIrqLocked( rmesa );
-	 rmesa->irqsEmitted--;
+      if (nr & 1) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 OUT_BATCH(voffset);
       }
-   } 
-   else {
-      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
-	 UNLOCK_HARDWARE( rmesa ); 
-	 if (rmesa->do_usleeps) 
-	    DO_USLEEP( 1 );
-	 LOCK_HARDWARE( rmesa ); 
+      for (i = 0; i + 1 < nr; i += 2) {
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[i+0].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[i+1].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
       }
-   }
-}
-
-/* Copy the back color buffer to the front color buffer.
- */
-void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
-		       const drm_clip_rect_t	  *rect)
-{
-   radeonContextPtr rmesa;
-   GLint nbox, i, ret;
-   GLboolean   missed_target;
-   int64_t ust;
-   __DRIscreenPrivate *psp;
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-
-   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
-   }
-
-   RADEON_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( rmesa );
-
-   /* Throttle the frame rate -- only allow one pending swap buffers
-    * request at a time.
-    */
-   radeonWaitForFrameCompletion( rmesa );
-   if (!rect)
-   {
-       UNLOCK_HARDWARE( rmesa );
-       driWaitForVBlank( dPriv, & missed_target );
-       LOCK_HARDWARE( rmesa );
-   }
-
-   nbox = dPriv->numClipRects; /* must be in locked region */
-
-   for ( i = 0 ; i < nbox ; ) {
-      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t *b = rmesa->sarea->boxes;
-      GLint n = 0;
-
-      for ( ; i < nr ; i++ ) {
-
-	  *b = box[i];
-
-	  if (rect)
-	  {
-	      if (rect->x1 > b->x1)
-		  b->x1 = rect->x1;
-	      if (rect->y1 > b->y1)
-		  b->y1 = rect->y1;
-	      if (rect->x2 < b->x2)
-		  b->x2 = rect->x2;
-	      if (rect->y2 < b->y2)
-		  b->y2 = rect->y2;
-
-	      if (b->x1 >= b->x2 || b->y1 >= b->y2)
-		  continue;
-	  }
-
-	  b++;
-	  n++;
-      }
-      rmesa->sarea->nbox = n;
-
-      if (!n)
-	 continue;
-
-      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
-
-      if ( ret ) {
-	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
-	 UNLOCK_HARDWARE( rmesa );
-	 exit( 1 );
+      if (nr & 1) {
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[nr-1].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
       }
    }
+   END_BATCH();
 
-   UNLOCK_HARDWARE( rmesa );
-   if (!rect)
-   {
-       psp = dPriv->driScreenPriv;
-       rmesa->swap_count++;
-       (*psp->systemTime->getUST)( & ust );
-       if ( missed_target ) {
-	   rmesa->swap_missed_count++;
-	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
-       }
-
-       rmesa->swap_ust = ust;
-       rmesa->hw.all_dirty = GL_TRUE;
-   }
-}
-
-void radeonPageFlip( __DRIdrawablePrivate *dPriv )
-{
-   radeonContextPtr rmesa;
-   GLint ret;
-   GLboolean   missed_target;
-   __DRIscreenPrivate *psp;
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
-   psp = dPriv->driScreenPriv;
-
-   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
-	      rmesa->sarea->pfCurrentPage);
-   }
-
-   RADEON_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( rmesa );
-
-   /* Need to do this for the perf box placement:
-    */
-   if (dPriv->numClipRects)
-   {
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t *b = rmesa->sarea->boxes;
-      b[0] = box[0];
-      rmesa->sarea->nbox = 1;
-   }
-
-   /* Throttle the frame rate -- only allow a few pending swap buffers
-    * request at a time.
-    */
-   radeonWaitForFrameCompletion( rmesa );
-   UNLOCK_HARDWARE( rmesa );
-   driWaitForVBlank( dPriv, & missed_target );
-   if ( missed_target ) {
-      rmesa->swap_missed_count++;
-      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
-   }
-   LOCK_HARDWARE( rmesa );
-
-   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
-
-   UNLOCK_HARDWARE( rmesa );
-
-   if ( ret ) {
-      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
-      exit( 1 );
-   }
-
-   rmesa->swap_count++;
-   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
-
-   /* Get ready for drawing next frame.  Update the renderbuffers'
-    * flippedOffset/Pitch fields so we draw into the right place.
-    */
-   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
-                        rmesa->sarea->pfCurrentPage);
-
-   radeonUpdateDrawBuffer(rmesa->glCtx);
+#endif
 }
 
-
 /* ================================================================
  * Buffer clear
  */
 #define RADEON_MAX_CLEARS	256
 
-static void radeonClear( GLcontext *ctx, GLbitfield mask )
+static void radeonKernelClear(GLcontext *ctx, GLuint flags)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-   drm_radeon_sarea_t *sarea = rmesa->sarea;
+     r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+   drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
    uint32_t clear;
-   GLuint flags = 0;
-   GLuint color_mask = 0;
    GLint ret, i;
    GLint cx, cy, cw, ch;
 
-   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
-      fprintf( stderr, "radeonClear\n");
-   }
-
-   {
-      LOCK_HARDWARE( rmesa );
-      UNLOCK_HARDWARE( rmesa );
-      if ( dPriv->numClipRects == 0 ) 
-	 return;
-   }
-   
-   radeonFlush( ctx ); 
-
-   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
-      flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-      mask &= ~BUFFER_BIT_FRONT_LEFT;
-   }
-
-   if ( mask & BUFFER_BIT_BACK_LEFT ) {
-      flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-      mask &= ~BUFFER_BIT_BACK_LEFT;
-   }
-
-   if ( mask & BUFFER_BIT_DEPTH ) {
-      flags |= RADEON_DEPTH;
-      mask &= ~BUFFER_BIT_DEPTH;
-   }
-
-   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
-      flags |= RADEON_STENCIL;
-      mask &= ~BUFFER_BIT_STENCIL;
-   }
-
-   if ( mask ) {
-      if (RADEON_DEBUG & DEBUG_FALLBACKS)
-	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
-      _swrast_Clear( ctx, mask );
-   }
-
-   if ( !flags ) 
-      return;
-
-   if (rmesa->using_hyperz) {
-      flags |= RADEON_USE_COMP_ZBUF;
-/*      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) 
-         flags |= RADEON_USE_HIERZ; */
-      if (!(rmesa->state.stencil.hwBuffer) ||
-	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
-	    ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
-	  flags |= RADEON_CLEAR_FASTZ;
-      }
-   }
-
-   LOCK_HARDWARE( rmesa );
+   LOCK_HARDWARE( &rmesa->radeon );
 
    /* compute region after locking: */
    cx = ctx->DrawBuffer->_Xmin;
@@ -1112,7 +475,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
 
       gp.param = RADEON_PARAM_LAST_CLEAR;
       gp.value = (int *)&clear;
-      ret = drmCommandWriteRead( rmesa->dri.fd,
+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
 				 DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
 
       if ( ret ) {
@@ -1124,20 +487,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
 	 break;
       }
 
-      if ( rmesa->do_usleeps ) {
-	 UNLOCK_HARDWARE( rmesa );
+      if ( rmesa->radeon.do_usleeps ) {
+	 UNLOCK_HARDWARE( &rmesa->radeon );
 	 DO_USLEEP( 1 );
-	 LOCK_HARDWARE( rmesa );
+	 LOCK_HARDWARE( &rmesa->radeon );
       }
    }
 
    /* Send current state to the hardware */
-   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
 
    for ( i = 0 ; i < dPriv->numClipRects ; ) {
       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
       drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t *b = rmesa->sarea->boxes;
+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
       drm_radeon_clear_t clear;
       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
       GLint n = 0;
@@ -1172,105 +535,107 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
 	 }
       }
 
-      rmesa->sarea->nbox = n;
+      rmesa->radeon.sarea->nbox = n;
 
       clear.flags       = flags;
-      clear.clear_color = rmesa->state.color.clear;
-      clear.clear_depth = rmesa->state.depth.clear;
+      clear.clear_color = rmesa->radeon.state.color.clear;
+      clear.clear_depth = rmesa->radeon.state.depth.clear;
       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
-      clear.depth_mask  = rmesa->state.stencil.clear;
+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
       clear.depth_boxes = depth_boxes;
 
       n--;
-      b = rmesa->sarea->boxes;
+      b = rmesa->radeon.sarea->boxes;
       for ( ; n >= 0 ; n-- ) {
 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
 	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
 	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
-	 depth_boxes[n].f[CLEAR_DEPTH] = 
-	    (float)rmesa->state.depth.clear;
+	 depth_boxes[n].f[CLEAR_DEPTH] =
+	    (float)rmesa->radeon.state.depth.clear;
       }
 
-      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
 			     &clear, sizeof(drm_radeon_clear_t));
 
       if ( ret ) {
-	 UNLOCK_HARDWARE( rmesa );
+	 UNLOCK_HARDWARE( &rmesa->radeon );
 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
 	 exit( 1 );
       }
    }
-
-   UNLOCK_HARDWARE( rmesa );
-   rmesa->hw.all_dirty = GL_TRUE;
+   UNLOCK_HARDWARE( &rmesa->radeon );
 }
 
-
-void radeonWaitForIdleLocked( radeonContextPtr rmesa )
+static void radeonClear( GLcontext *ctx, GLbitfield mask )
 {
-    int fd = rmesa->dri.fd;
-    int to = 0;
-    int ret, i = 0;
-
-    rmesa->c_drawWaits++;
-
-    do {
-        do {
-            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
-        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
-    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
-
-    if ( ret < 0 ) {
-	UNLOCK_HARDWARE( rmesa );
-	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
-	exit( -1 );
-    }
-}
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLuint flags = 0;
+   GLuint color_mask = 0;
+   GLuint orig_mask = mask;
 
+   if ( RADEON_DEBUG & RADEON_IOCTL ) {
+      fprintf( stderr, "radeonClear\n");
+   }
 
-static void radeonWaitForIdle( radeonContextPtr rmesa )
-{
-   LOCK_HARDWARE(rmesa);
-   radeonWaitForIdleLocked( rmesa );
-   UNLOCK_HARDWARE(rmesa);
-}
+   {
+      LOCK_HARDWARE( &rmesa->radeon );
+      UNLOCK_HARDWARE( &rmesa->radeon );
+      if ( dPriv->numClipRects == 0 )
+	 return;
+   }
 
+   radeon_firevertices(&rmesa->radeon);
 
-void radeonFlush( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= RADEON_FRONT;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
 
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= RADEON_BACK;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
 
-   if (rmesa->dma.flush)
-      rmesa->dma.flush( rmesa );
+   if ( mask & BUFFER_BIT_DEPTH ) {
+      flags |= RADEON_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
 
-   radeonEmitState( rmesa );
-   
-   if (rmesa->store.cmd_used)
-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-}
+   if ( (mask & BUFFER_BIT_STENCIL) ) {
+      flags |= RADEON_STENCIL;
+      mask &= ~BUFFER_BIT_STENCIL;
+   }
 
-/* Make sure all commands have been sent to the hardware and have
- * completed processing.
- */
-void radeonFinish( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   radeonFlush( ctx );
-
-   if (rmesa->do_irqs) {
-      LOCK_HARDWARE( rmesa );
-      radeonEmitIrqLocked( rmesa );
-      UNLOCK_HARDWARE( rmesa );
-      radeonWaitIrq( rmesa );
+   if ( mask ) {
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+      _swrast_Clear( ctx, mask );
    }
-   else
-      radeonWaitForIdle( rmesa );
-}
 
+   if ( !flags )
+      return;
+
+   if (rmesa->using_hyperz) {
+      flags |= RADEON_USE_COMP_ZBUF;
+/*      if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL)
+         flags |= RADEON_USE_HIERZ; */
+      if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+	    ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+	  flags |= RADEON_CLEAR_FASTZ;
+      }
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+     radeonUserClear(ctx, orig_mask);
+   else {
+      radeonKernelClear(ctx, flags);
+      rmesa->radeon.hw.all_dirty = GL_TRUE;
+   }
+}
 
 void radeonInitIoctlFuncs( GLcontext *ctx )
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
index 4e3a44df075..deb53ae3138 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/simple_list.h"
 #include "radeon_lock.h"
+#include "radeon_bocs_wrapper.h"
 
-
-extern void radeonEmitState( radeonContextPtr rmesa );
-extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
+extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
 				 GLuint vertex_size,
+				 struct radeon_bo *bo,
 				 GLuint offset );
 
-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
 				GLuint vertex_format,
 				GLuint primitive,
 				GLuint vertex_nr );
 
-extern void radeonFlushElts( radeonContextPtr rmesa );
+extern void radeonFlushElts( GLcontext *ctx );
+			    
 
-extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
+extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
 					   GLuint vertex_format,
 					   GLuint primitive,
 					   GLuint min_nr );
 
-extern void radeonEmitAOS( radeonContextPtr rmesa,
-			   struct radeon_dma_region **regions,
+
+extern void radeonEmitAOS( r100ContextPtr rmesa,
 			   GLuint n,
 			   GLuint offset );
 
-extern void radeonEmitBlit( radeonContextPtr rmesa,
+extern void radeonEmitBlit( r100ContextPtr rmesa,
 			    GLuint color_fmt,
 			    GLuint src_pitch,
 			    GLuint src_offset,
@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa,
 			    GLint dstx, GLint dsty,
 			    GLuint w, GLuint h );
 
-extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
-
-extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
-extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
+extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
 
-extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
-				  struct radeon_dma_region *region,
-				  int bytes, 
-				  int alignment );
+extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
 
-extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
-				    struct radeon_dma_region *region,
-				    const char *caller );
-
-extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
-			      const drm_clip_rect_t	 *rect);
-extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
 extern void radeonFlush( GLcontext *ctx );
 extern void radeonFinish( GLcontext *ctx );
-extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
-extern void radeonWaitForVBlank( radeonContextPtr rmesa );
 extern void radeonInitIoctlFuncs( GLcontext *ctx );
-extern void radeonGetAllParams( radeonContextPtr rmesa );
-extern void radeonSetUpAtomList( radeonContextPtr rmesa );
+extern void radeonGetAllParams( r100ContextPtr rmesa );
+extern void radeonSetUpAtomList( r100ContextPtr rmesa );
 
 /* ================================================================
  * Helper macros:
@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa );
  */
 #define RADEON_NEWPRIM( rmesa )			\
 do {						\
-   if ( rmesa->dma.flush )			\
-      rmesa->dma.flush( rmesa );	\
+   if ( rmesa->radeon.dma.flush )			\
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
 } while (0)
 
 /* Can accomodate several state changes and primitive changes without
  * actually firing the buffer.
  */
+
 #define RADEON_STATECHANGE( rmesa, ATOM )			\
 do {								\
    RADEON_NEWPRIM( rmesa );					\
    rmesa->hw.ATOM.dirty = GL_TRUE;				\
-   rmesa->hw.is_dirty = GL_TRUE;				\
+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
 } while (0)
 
-#define RADEON_DB_STATE( ATOM )			        \
+#define RADEON_DB_STATE( ATOM )				\
    memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
 	   rmesa->hw.ATOM.cmd_size * 4)
 
-static INLINE int RADEON_DB_STATECHANGE( 
-   radeonContextPtr rmesa,
-   struct radeon_state_atom *atom )
+static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
+					struct radeon_state_atom *atom )
 {
    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
-      int *tmp;
+      GLuint *tmp;
       RADEON_NEWPRIM( rmesa );
       atom->dirty = GL_TRUE;
-      rmesa->hw.is_dirty = GL_TRUE;
+      rmesa->radeon.hw.is_dirty = GL_TRUE;
       tmp = atom->cmd; 
       atom->cmd = atom->lastcmd;
       atom->lastcmd = tmp;
@@ -141,62 +127,55 @@ static INLINE int RADEON_DB_STATECHANGE(
       return 0;
 }
 
-
-/* Fire the buffered vertices no matter what.
- */
-#define RADEON_FIREVERTICES( rmesa )			\
-do {							\
-   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
-      radeonFlush( rmesa->glCtx );			\
-   }							\
-} while (0)
-
 /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
  * are available, you will also be adding an rmesa->state.max_state_size because
  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
  */
 #if RADEON_OLD_PACKETS
-#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
+#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2)
 #define VERT_AOS_BUFSZ	(0)
 #define ELTS_BUFSZ(nr)	(24 + nr * 2)
-#define VBUF_BUFSZ	(6 * sizeof(int))
+#define VBUF_BUFSZ	(8)
 #else
-#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2)) * sizeof(int))
-#define VERT_AOS_BUFSZ	(5 * sizeof(int))
+#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ	(5)
 #define ELTS_BUFSZ(nr)	(16 + nr * 2)
-#define VBUF_BUFSZ	(4 * sizeof(int))
+#define VBUF_BUFSZ	(4)
 #endif
+#define SCISSOR_BUFSZ	(8)
+#define INDEX_BUFSZ	(7)
 
-/* Ensure that a minimum amount of space is available in the command buffer.
- * This is used to ensure atomicity of state updates with the rendering requests
- * that rely on them.
- *
- * An alternative would be to implement a "soft lock" such that when the buffer
- * wraps at an inopportune time, we grab the lock, flush the current buffer,
- * and hang on to the lock until the critical section is finished and we flush
- * the buffer again and unlock.
- */
-static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
-					      int bytes )
-{
-   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
-   assert( bytes <= RADEON_CMD_BUF_SZ );
-}
 
-/* Alloc space in the command buffer
- */
-static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
-					 int bytes, const char *where )
+static inline uint32_t cmdpacket3(int cmd_type)
 {
-   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+  drm_radeon_cmd_header_t cmd;
+
+  cmd.i = 0;
+  cmd.header.cmd_type = cmd_type;
+
+  return (uint32_t)cmd.i;
 
-   {
-      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
-      rmesa->store.cmd_used += bytes;
-      return head;
-   }
 }
 
+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+
 #endif /* __RADEON_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_lighting.c b/src/mesa/drivers/dri/radeon/radeon_lighting.c
index ac3b94e4a68..ba444f2b100 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lighting.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lighting.c
@@ -195,7 +195,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
    if (ctx->Light.ColorMaterialEnabled)
       mask &= ~ctx->Light.ColorMaterialBitmask;
 
-   if (RADEON_DEBUG & DEBUG_STATE)
+   if (RADEON_DEBUG & RADEON_STATE)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
       
@@ -234,7 +234,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
       check_twoside_fallback( ctx );
       update_global_ambient( ctx );
    }
-   else if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_STATE))
+   else if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_STATE))
       fprintf(stderr, "%s: Elided noop material call\n", __FUNCTION__);
 }
 
@@ -624,7 +624,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
    GLboolean tmp;
    RADEON_STATECHANGE( rmesa, tcl );
 
-   if (RADEON_DEBUG & DEBUG_STATE)
+   if (RADEON_DEBUG & RADEON_STATE)
       fprintf(stderr, "%s %d\n", __FUNCTION__, ctx->_NeedEyeCoords);
 
    if (ctx->_NeedEyeCoords)
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index 64bb3ca103f..02de8e5fd1d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -41,30 +41,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "main/glheader.h"
 #include "main/mtypes.h"
-#include "radeon_context.h"
+#include "main/colormac.h"
+#include "dri_util.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
 #include "radeon_lock.h"
-#include "radeon_tex.h"
-#include "radeon_state.h"
-#include "radeon_ioctl.h"
-
 #include "drirenderbuffer.h"
 
-#if DEBUG_LOCKING
-char *prevLockFile = NULL;
-int prevLockLine = 0;
-#endif
-
-/* Turn on/off page flipping according to the flags in the sarea:
- */
-static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
-{
-	rmesa->doPageFlip = rmesa->sarea->pfState;
-	if (rmesa->glCtx->WinSysDrawBuffer) {
-		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
-				     rmesa->sarea->pfCurrentPage);
-	}
-}
-
 /* Update the hardware state.  This is called if another context has
  * grabbed the hardware lock, which includes the X server.  This
  * function also updates the driver's window state after the X server
@@ -75,10 +58,11 @@ static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
  */
 void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
 {
-	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
-	__DRIdrawablePrivate *const readable = rmesa->dri.readable;
+	__DRIdrawablePrivate *const drawable = radeon_get_drawable(rmesa);
+	__DRIdrawablePrivate *const readable = radeon_get_readable(rmesa);
 	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
-	drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+	assert(drawable != NULL);
 
 	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
 
@@ -96,29 +80,89 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
 	}
 
 	if (rmesa->lastStamp != drawable->lastStamp) {
-		radeonUpdatePageFlipping(rmesa);
-		radeonSetCliprects(rmesa);
-		radeonUpdateViewportOffset(rmesa->glCtx);
-		driUpdateFramebufferSize(rmesa->glCtx, drawable);
+		radeon_window_moved(rmesa);
+		rmesa->lastStamp = drawable->lastStamp;
 	}
 
-	RADEON_STATECHANGE(rmesa, ctx);
-	if (rmesa->sarea->tiling_enabled) {
-		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
-		    RADEON_COLOR_TILE_ENABLE;
-	} else {
-		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
-		    ~RADEON_COLOR_TILE_ENABLE;
-	}
+	rmesa->vtbl.get_lock(rmesa);
+}
+#ifndef NDEBUG
+struct lock_debug {
+	const char* function;
+	const char* file;
+	int line;
+};
+
+static struct lock_debug ldebug = {0};
+#endif
+
+#if 0
+/** TODO: use atomic operations for reference counting **/
+/** gcc 4.2 has builtin functios for this **/
+#define ATOMIC_INC_AND_FETCH(atomic) __sync_add_and_fetch(&atomic, 1)
+#define ATOMIC_DEC_AND_FETCH(atomic) __sync_sub_and_fetch(&atomic, 1)
+#else
+#define ATOMIC_INC_AND_FETCH(atomic) (++atomic)
+#define ATOMIC_DEC_AND_FETCH(atomic) (--atomic)
+#endif
+
 
-	if (sarea->ctx_owner != rmesa->dri.hwContext) {
-		int i;
-		sarea->ctx_owner = rmesa->dri.hwContext;
+void radeon_lock_hardware(radeonContextPtr radeon
+#ifndef NDEBUG
+		,const char* function
+		,const char* file
+		,const int line
+#endif
+		)
+{
+	char ret = 0;
+	struct radeon_framebuffer *rfb = NULL;
+	struct radeon_renderbuffer *rrb = NULL;
+
+	if (radeon_get_drawable(radeon)) {
+		rfb = radeon_get_drawable(radeon)->driverPrivate;
+
+		if (rfb)
+			rrb = radeon_get_renderbuffer(&rfb->base,
+						      rfb->base._ColorDrawBufferIndexes[0]);
+	}
 
-		for (i = 0; i < rmesa->nr_heaps; i++) {
-			DRI_AGE_TEXTURES(rmesa->texture_heaps[i]);
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+		if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
+		{
+#ifndef NDEBUG
+			if ( RADEON_DEBUG & RADEON_SANITY )
+				fprintf(stderr, "*** %d times of recursive call to %s ***\n"
+						"Original call was from %s (file: %s line: %d)\n"
+						"Now call is coming from %s (file: %s line: %d)\n"
+						, radeon->dri.hwLockCount, __FUNCTION__
+						, ldebug.function, ldebug.file, ldebug.line
+						, function, file, line
+					   );
+#endif
+			return;
 		}
+		DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext,
+			 (DRM_LOCK_HELD | radeon->dri.hwContext), ret );
+		if (ret)
+			radeonGetLock(radeon, 0);
+#ifndef NDEBUG
+		ldebug.function = function;
+		ldebug.file = file;
+		ldebug.line = line;
+#endif
 	}
+}
 
-	rmesa->lost_context = GL_TRUE;
+void radeon_unlock_hardware(radeonContextPtr radeon)
+{
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+		if (ATOMIC_DEC_AND_FETCH(radeon->dri.hwLockCount) > 0)
+		{
+			return;
+		}
+		DRM_UNLOCK( radeon->dri.fd,
+			    radeon->dri.hwLock,
+			    radeon->dri.hwContext );
+	}
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
index 86e96aa7d2c..da5a5b43715 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.h
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
@@ -39,74 +39,31 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *   Kevin E. Martin <martin@valinux.com>
  */
 
-#ifndef __RADEON_LOCK_H__
-#define __RADEON_LOCK_H__
+#ifndef COMMON_LOCK_H
+#define COMMON_LOCK_H
 
-extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
-
-/* Turn DEBUG_LOCKING on to find locking conflicts.
- */
-#define DEBUG_LOCKING	0
-
-#if DEBUG_LOCKING
-extern char *prevLockFile;
-extern int prevLockLine;
-
-#define DEBUG_LOCK()							\
-   do {									\
-      prevLockFile = (__FILE__);					\
-      prevLockLine = (__LINE__);					\
-   } while (0)
-
-#define DEBUG_RESET()							\
-   do {									\
-      prevLockFile = 0;							\
-      prevLockLine = 0;							\
-   } while (0)
-
-#define DEBUG_CHECK_LOCK()						\
-   do {									\
-      if ( prevLockFile ) {						\
-	 fprintf( stderr,						\
-		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
-		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
-	 exit( 1 );							\
-      }									\
-   } while (0)
-
-#else
+#include "main/colormac.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
 
-#define DEBUG_LOCK()
-#define DEBUG_RESET()
-#define DEBUG_CHECK_LOCK()
+extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
 
+void radeon_lock_hardware(radeonContextPtr rmesa
+#ifndef NDEBUG
+		,const char* function
+		,const char* file
+		,const int line
 #endif
-
-/*
- * !!! We may want to separate locks from locks with validation.  This
- * could be used to improve performance for those things commands that
- * do not do any drawing !!!
- */
+		);
+void radeon_unlock_hardware(radeonContextPtr rmesa);
 
 /* Lock the hardware and validate our state.
  */
-#define LOCK_HARDWARE( rmesa )					\
-   do {								\
-      char __ret = 0;						\
-      DEBUG_CHECK_LOCK();					\
-      DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext,		\
-	       (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret );	\
-      if ( __ret )						\
-	 radeonGetLock( (rmesa), 0 );				\
-      DEBUG_LOCK();						\
-   } while (0)
-
-#define UNLOCK_HARDWARE( rmesa )					\
-   do {									\
-      DRM_UNLOCK( (rmesa)->dri.fd,					\
-		  (rmesa)->dri.hwLock,					\
-		  (rmesa)->dri.hwContext );				\
-      DEBUG_RESET();							\
-   } while (0)
+#ifdef NDEBUG
+#define LOCK_HARDWARE( rmesa )	radeon_lock_hardware(rmesa)
+#else
+#define LOCK_HARDWARE( rmesa )	radeon_lock_hardware(rmesa, __FUNCTION__, __FILE__, __LINE__)
+#endif
+#define UNLOCK_HARDWARE( rmesa )  radeon_unlock_hardware(rmesa)
 
-#endif				/* __RADEON_LOCK_H__ */
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.h b/src/mesa/drivers/dri/radeon/radeon_maos.h
index b8935e84a05..b88eb198d57 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos.h
+++ b/src/mesa/drivers/dri/radeon/radeon_maos.h
@@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_context.h"
 
 extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
-extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
 
 #endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
index 31eea13f4ef..08e1c5d00df 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
@@ -48,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_maos.h"
 #include "radeon_tcl.h"
 
-#if 0
-/* Usage:
- *   - from radeon_tcl_render
- *   - call radeonEmitArrays to ensure uptodate arrays in dma
- *   - emit primitives (new type?) which reference the data
- *       -- need to use elts for lineloop, quads, quadstrip/flat
- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
- *
- */
-static void emit_ubyte_rgba3( GLcontext *ctx,
-		       struct radeon_dma_region *rvb,
-		       char *data,
-		       int stride,
-		       int count )
-{
-   int i;
-   radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d stride %d out %p\n",
-	      __FUNCTION__, count, stride, (void *)out);
-
-   for (i = 0; i < count; i++) {
-      out->red   = *data;
-      out->green = *(data+1);
-      out->blue  = *(data+2);
-      out->alpha = 0xFF;
-      out++;
-      data += stride;
-   }
-}
-
-static void emit_ubyte_rgba4( GLcontext *ctx,
-			      struct radeon_dma_region *rvb,
-			      char *data,
-			      int stride,
-			      int count )
+static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
+			GLvoid *data, int stride, int count)
 {
    int i;
-   int *out = (int *)(rvb->address + rvb->start);
+   uint32_t *out;
+   int size = 1;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if (RADEON_DEBUG & DEBUG_VERTS)
+   if (RADEON_DEBUG & RADEON_VERTS)
       fprintf(stderr, "%s count %d stride %d\n",
 	      __FUNCTION__, count, stride);
 
-   if (stride == 4)
-       COPY_DWORDS( out, data, count );
-   else
-      for (i = 0; i < count; i++) {
-	 *out++ = LE32_TO_CPU(*(int *)data);
-	 data += stride;
-      }
-}
-
-
-static void emit_ubyte_rgba( GLcontext *ctx,
-			     struct radeon_dma_region *rvb,
-			     char *data,
-			     int size,
-			     int stride,
-			     int count )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
-
-   assert (!rvb->buf);
-
    if (stride == 0) {
-      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+      radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
       count = 1;
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = 0;
-      rvb->aos_size = 1;
+      aos->stride = 0;
    }
    else {
-      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = 1;
-      rvb->aos_size = 1;
-   }
-
-   /* Emit the data
-    */
-   switch (size) {
-   case 3:
-      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
-      break;
-   case 4:
-      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
-      break;
-   default:
-      assert(0);
-      exit(1);
-      break;
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+      aos->stride = size;
    }
-}
-#endif
-
-#if defined(USE_X86_ASM)
-#define COPY_DWORDS( dst, src, nr )					\
-do {									\
-	int __tmp;							\
-	__asm__ __volatile__( "rep ; movsl"				\
-			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
-			      : "0" (nr),				\
-			        "D" ((long)dst),			\
-			        "S" ((long)src) );			\
-} while (0)
-#else
-#define COPY_DWORDS( dst, src, nr )		\
-do {						\
-   int j;					\
-   for ( j = 0 ; j < nr ; j++ )			\
-      dst[j] = ((int *)src)[j];			\
-   dst += nr;					\
-} while (0)
-#endif
-
-static void emit_vecfog( GLcontext *ctx,
-			 struct radeon_dma_region *rvb,
-			 char *data,
-			 int stride,
-			 int count )
-{
-   int i;
-   GLfloat *out;
-
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
 
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d stride %d\n",
-	      __FUNCTION__, count, stride);
+   aos->components = size;
+   aos->count = count;
 
-   assert (!rvb->buf);
-
-   if (stride == 0) {
-      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
-      count = 1;
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = 0;
-      rvb->aos_size = 1;
-   }
-   else {
-      radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = 1;
-      rvb->aos_size = 1;
-   }
 
    /* Emit the data
     */
-   out = (GLfloat *)(rvb->address + rvb->start);
+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
    for (i = 0; i < count; i++) {
       out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
       out++;
@@ -209,170 +84,10 @@ static void emit_vecfog( GLcontext *ctx,
    }
 }
 
-static void emit_vec4( GLcontext *ctx,
-		       struct radeon_dma_region *rvb,
-		       char *data,
-		       int stride,
-		       int count )
-{
-   int i;
-   int *out = (int *)(rvb->address + rvb->start);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d stride %d\n",
-	      __FUNCTION__, count, stride);
-
-   if (stride == 4)
-      COPY_DWORDS( out, data, count );
-   else
-      for (i = 0; i < count; i++) {
-	 out[0] = *(int *)data;
-	 out++;
-	 data += stride;
-      }
-}
-
-
-static void emit_vec8( GLcontext *ctx,
-		       struct radeon_dma_region *rvb,
-		       char *data,
-		       int stride,
-		       int count )
-{
-   int i;
-   int *out = (int *)(rvb->address + rvb->start);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d stride %d\n",
-	      __FUNCTION__, count, stride);
-
-   if (stride == 8)
-      COPY_DWORDS( out, data, count*2 );
-   else
-      for (i = 0; i < count; i++) {
-	 out[0] = *(int *)data;
-	 out[1] = *(int *)(data+4);
-	 out += 2;
-	 data += stride;
-      }
-}
-
-static void emit_vec12( GLcontext *ctx,
-		       struct radeon_dma_region *rvb,
-		       char *data,
-		       int stride,
-		       int count )
-{
-   int i;
-   int *out = (int *)(rvb->address + rvb->start);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-	      __FUNCTION__, count, stride, (void *)out, (void *)data);
-
-   if (stride == 12)
-      COPY_DWORDS( out, data, count*3 );
-   else
-      for (i = 0; i < count; i++) {
-	 out[0] = *(int *)data;
-	 out[1] = *(int *)(data+4);
-	 out[2] = *(int *)(data+8);
-	 out += 3;
-	 data += stride;
-      }
-}
-
-static void emit_vec16( GLcontext *ctx,
-			struct radeon_dma_region *rvb,
-			char *data,
-			int stride,
-			int count )
-{
-   int i;
-   int *out = (int *)(rvb->address + rvb->start);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d stride %d\n",
-	      __FUNCTION__, count, stride);
-
-   if (stride == 16)
-      COPY_DWORDS( out, data, count*4 );
-   else
-      for (i = 0; i < count; i++) {
-	 out[0] = *(int *)data;
-	 out[1] = *(int *)(data+4);
-	 out[2] = *(int *)(data+8);
-	 out[3] = *(int *)(data+12);
-	 out += 4;
-	 data += stride;
-      }
-}
-
-
-static void emit_vector( GLcontext *ctx,
-			 struct radeon_dma_region *rvb,
-			 char *data,
-			 int size,
-			 int stride,
-			 int count )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s count %d size %d stride %d\n",
-	      __FUNCTION__, count, size, stride);
-
-   assert (!rvb->buf);
-
-   if (stride == 0) {
-      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
-      count = 1;
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = 0;
-      rvb->aos_size = size;
-   }
-   else {
-      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = size;
-      rvb->aos_size = size;
-   }
-
-   /* Emit the data
-    */
-   switch (size) {
-   case 1:
-      emit_vec4( ctx, rvb, data, stride, count );
-      break;
-   case 2:
-      emit_vec8( ctx, rvb, data, stride, count );
-      break;
-   case 3:
-      emit_vec12( ctx, rvb, data, stride, count );
-      break;
-   case 4:
-      emit_vec16( ctx, rvb, data, stride, count );
-      break;
-   default:
-      assert(0);
-      exit(1);
-      break;
-   }
-
-}
-
-
-
-static void emit_s0_vec( GLcontext *ctx,
-			 struct radeon_dma_region *rvb,
-			 char *data,
-			 int stride,
-			 int count )
+static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
 {
    int i;
-   int *out = (int *)(rvb->address + rvb->start);
-
-   if (RADEON_DEBUG & DEBUG_VERTS)
+   if (RADEON_DEBUG & RADEON_VERTS)
       fprintf(stderr, "%s count %d stride %d\n",
 	      __FUNCTION__, count, stride);
 
@@ -384,16 +99,11 @@ static void emit_s0_vec( GLcontext *ctx,
    }
 }
 
-static void emit_stq_vec( GLcontext *ctx,
-			 struct radeon_dma_region *rvb,
-			 char *data,
-			 int stride,
-			 int count )
+static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
 {
    int i;
-   int *out = (int *)(rvb->address + rvb->start);
 
-   if (RADEON_DEBUG & DEBUG_VERTS)
+   if (RADEON_DEBUG & RADEON_VERTS)
       fprintf(stderr, "%s count %d stride %d\n",
 	      __FUNCTION__, count, stride);
 
@@ -409,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx,
 
 
 
-static void emit_tex_vector( GLcontext *ctx,
-			     struct radeon_dma_region *rvb,
-			     char *data,
-			     int size,
-			     int stride,
-			     int count )
+static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
+			    GLvoid *data, int size, int stride, int count)
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    int emitsize;
+   uint32_t *out;
 
-   if (RADEON_DEBUG & DEBUG_VERTS)
+   if (RADEON_DEBUG & RADEON_VERTS)
       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
 
-   assert (!rvb->buf);
-
    switch (size) {
    case 4: emitsize = 3; break;
    case 3: emitsize = 3; break;
@@ -432,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx,
 
 
    if (stride == 0) {
-      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
       count = 1;
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = 0;
-      rvb->aos_size = emitsize;
+      aos->stride = 0;
    }
    else {
-      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
-      rvb->aos_start = GET_START(rvb);
-      rvb->aos_stride = emitsize;
-      rvb->aos_size = emitsize;
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
+      aos->stride = emitsize;
    }
 
+   aos->components = emitsize;
+   aos->count = count;
 
    /* Emit the data
     */
+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
    switch (size) {
    case 1:
-      emit_s0_vec( ctx, rvb, data, stride, count ); 
+      emit_s0_vec( out, data, stride, count );
       break;
    case 2:
-      emit_vec8( ctx, rvb, data, stride, count );
+      radeonEmitVec8( out, data, stride, count );
       break;
    case 3:
-      emit_vec12( ctx, rvb, data, stride, count );
+      radeonEmitVec12( out, data, stride, count );
       break;
    case 4:
-      emit_stq_vec( ctx, rvb, data, stride, count );
+      emit_stq_vec( out, data, stride, count );
       break;
    default:
       assert(0);
@@ -476,27 +180,26 @@ static void emit_tex_vector( GLcontext *ctx,
  */
 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
-   struct radeon_dma_region **component = rmesa->tcl.aos_components;
    GLuint nr = 0;
    GLuint vfmt = 0;
    GLuint count = VB->Count;
    GLuint vtx, unit;
    
 #if 0
-   if (RADEON_DEBUG & DEBUG_VERTS) 
+   if (RADEON_DEBUG & RADEON_VERTS)
       _tnl_print_vert_flags( __FUNCTION__, inputs );
 #endif
 
    if (1) {
       if (!rmesa->tcl.obj.buf) 
-	 emit_vector( ctx, 
-		      &rmesa->tcl.obj, 
-		      (char *)VB->ObjPtr->data,
-		      VB->ObjPtr->size,
-		      VB->ObjPtr->stride,
-		      count);
+	rcommon_emit_vector( ctx, 
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->ObjPtr->data,
+			     VB->ObjPtr->size,
+			     VB->ObjPtr->stride,
+			     count);
 
       switch( VB->ObjPtr->size ) {
       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
@@ -505,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
       default:
          break;
       }
-      component[nr++] = &rmesa->tcl.obj;
+      nr++;
    }
    
 
    if (inputs & VERT_BIT_NORMAL) {
       if (!rmesa->tcl.norm.buf)
-	 emit_vector( ctx, 
-		      &(rmesa->tcl.norm), 
-		      (char *)VB->NormalPtr->data,
-		      3,
-		      VB->NormalPtr->stride,
-		      count);
+	 rcommon_emit_vector( ctx, 
+			      &(rmesa->tcl.aos[nr]),
+			      (char *)VB->NormalPtr->data,
+			      3,
+			      VB->NormalPtr->stride,
+			      count);
 
       vfmt |= RADEON_CP_VC_FRMT_N0;
-      component[nr++] = &rmesa->tcl.norm;
+      nr++;
    }
 
    if (inputs & VERT_BIT_COLOR0) {
@@ -537,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
       }
 
       if (!rmesa->tcl.rgba.buf)
-	 emit_vector( ctx,
-		      &(rmesa->tcl.rgba),
-		      (char *)VB->ColorPtr[0]->data,
-		      emitsize,
-		      VB->ColorPtr[0]->stride,
-		      count);
-
-
-      component[nr++] = &rmesa->tcl.rgba;
+	rcommon_emit_vector( ctx,
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->ColorPtr[0]->data,
+			     emitsize,
+			     VB->ColorPtr[0]->stride,
+			     count);
+
+      nr++;
    }
 
 
    if (inputs & VERT_BIT_COLOR1) {
       if (!rmesa->tcl.spec.buf) {
 
-	 emit_vector( ctx,
-		      &rmesa->tcl.spec,
-		      (char *)VB->SecondaryColorPtr[0]->data,
-		      3,
-		      VB->SecondaryColorPtr[0]->stride,
-		      count);
+	rcommon_emit_vector( ctx,
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->SecondaryColorPtr[0]->data,
+			     3,
+			     VB->SecondaryColorPtr[0]->stride,
+			     count);
       }
 
       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
-      component[nr++] = &rmesa->tcl.spec;
+      nr++;
    }
 
 /* FIXME: not sure if this is correct. May need to stitch this together with
@@ -570,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
    if (inputs & VERT_BIT_FOG) {
       if (!rmesa->tcl.fog.buf)
 	 emit_vecfog( ctx,
-		      &(rmesa->tcl.fog),
+		      &(rmesa->tcl.aos[nr]),
 		      (char *)VB->FogCoordPtr->data,
 		      VB->FogCoordPtr->stride,
 		      count);
 
       vfmt |= RADEON_CP_VC_FRMT_FPFOG;
-      component[nr++] = &rmesa->tcl.fog;
+      nr++;
    }
 
 
@@ -587,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
       if (inputs & VERT_BIT_TEX(unit)) {
 	 if (!rmesa->tcl.tex[unit].buf)
 	    emit_tex_vector( ctx,
-			     &(rmesa->tcl.tex[unit]),
+			     &(rmesa->tcl.aos[nr]),
 			     (char *)VB->TexCoordPtr[unit]->data,
 			     VB->TexCoordPtr[unit]->size,
 			     VB->TexCoordPtr[unit]->stride,
 			     count );
+	 nr++;
 
 	 vfmt |= RADEON_ST_BIT(unit);
          /* assume we need the 3rd coord if texgen is active for r/q OR at least
@@ -609,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 		 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
 	       radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
 	 }
-	 component[nr++] = &rmesa->tcl.tex[unit];
       }
    }
 
@@ -622,34 +324,3 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
    rmesa->tcl.vertex_format = vfmt;
 }
 
-
-void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-   GLuint unit;
-
-#if 0
-   if (RADEON_DEBUG & DEBUG_VERTS) 
-      _tnl_print_vert_flags( __FUNCTION__, newinputs );
-#endif
-
-   if (newinputs & VERT_BIT_POS) 
-     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_NORMAL) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_COLOR0) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_COLOR1) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
-      
-   if (newinputs & VERT_BIT_FOG)
-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
-
-   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
-      if (newinputs & VERT_BIT_TEX(unit))
-         radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
-   }
-}
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
index 034cda8a65b..515783135d6 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
@@ -54,8 +54,7 @@ static void TAG(emit)( GLcontext *ctx,
 
    union emit_union *v = (union emit_union *)dest;
 
-   if (RADEON_DEBUG & DEBUG_VERTS)
-      fprintf(stderr, "%s\n", __FUNCTION__); 
+   radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__);
 
    coord = (GLuint (*)[4])VB->ObjPtr->data;
    coord_stride = VB->ObjPtr->stride;
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
index 126d0727c63..78ec1193026 100644
--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
@@ -310,7 +310,7 @@ static void init_tcl_verts( void )
 
 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
    GLuint req = 0;
    GLuint unit;
@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 	 break;
 
    if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
-       rmesa->tcl.indexed_verts.buf)
+       rmesa->radeon.tcl.aos[0].bo)
       return;
 
-   if (rmesa->tcl.indexed_verts.buf)
+   if (rmesa->radeon.tcl.aos[0].bo)
       radeonReleaseArrays( ctx, ~0 );
 
-   radeonAllocDmaRegion( rmesa,
-			 &rmesa->tcl.indexed_verts, 
+   radeonAllocDmaRegion( &rmesa->radeon,
+			 &rmesa->radeon.tcl.aos[0].bo,
+			 &rmesa->radeon.tcl.aos[0].offset,
 			 VB->Count * setup_tab[i].vertex_size * 4, 
 			 4);
 
@@ -421,29 +422,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
 
 
    setup_tab[i].emit( ctx, 0, VB->Count, 
-		      rmesa->tcl.indexed_verts.address + 
-		      rmesa->tcl.indexed_verts.start );
+		      rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset);
 
+   //   rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size;
+   rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size;
    rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
-   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
-   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
-   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
-
-   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
-   rmesa->tcl.nr_aos_components = 1;
+   rmesa->radeon.tcl.aos_count = 1;
 }
 
 
-
-void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-
-#if 0
-   if (RADEON_DEBUG & DEBUG_VERTS) 
-      _tnl_print_vert_flags( __FUNCTION__, newinputs );
-#endif
-
-   if (newinputs) 
-     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
-}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
new file mode 100644
index 00000000000..38db305e2a7
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mipmap_tree.h"
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "main/simple_list.h"
+#include "main/texcompress.h"
+#include "main/texformat.h"
+
+static GLuint radeon_compressed_texture_size(GLcontext *ctx,
+		GLsizei width, GLsizei height, GLsizei depth,
+		GLuint mesaFormat)
+{
+	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
+
+	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
+	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
+		if (width + 3 < 8)	/* width one block */
+			size = size * 4;
+		else if (width + 3 < 16)
+			size = size * 2;
+	} else {
+		/* DXT3/5, 16 bytes per block */
+	  //		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
+		if (width + 3 < 8)
+			size = size * 2;
+	}
+
+	return size;
+}
+
+
+static int radeon_compressed_num_bytes(GLuint mesaFormat)
+{
+   int bytes = 0;
+   switch(mesaFormat) {
+     
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_RGBA_DXT1:
+     bytes = 2;
+     break;
+     
+   case MESA_FORMAT_RGBA_DXT3:
+   case MESA_FORMAT_RGBA_DXT5:
+     bytes = 4;
+   default:
+     break;
+   }
+   
+   return bytes;
+}
+
+/**
+ * Compute sizes and fill in offset and blit information for the given
+ * image (determined by \p face and \p level).
+ *
+ * \param curOffset points to the offset at which the image is to be stored
+ * and is updated by this function according to the size of the image.
+ */
+static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt,
+	GLuint face, GLuint level, GLuint* curOffset)
+{
+	radeon_mipmap_level *lvl = &mt->levels[level];
+	uint32_t row_align;
+
+	/* Find image size in bytes */
+	if (mt->compressed) {
+		/* TODO: Is this correct? Need test cases for compressed textures! */
+		row_align = rmesa->texture_compressed_row_align - 1;
+		lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+		lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx,
+							   lvl->width, lvl->height, lvl->depth, mt->compressed);
+	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
+		row_align = rmesa->texture_rect_row_align - 1;
+		lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+		lvl->size = lvl->rowstride * lvl->height;
+	} else if (mt->tilebits & RADEON_TXO_MICRO_TILE) {
+		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+		 * though the actual offset may be different (if texture is less than
+		 * 32 bytes width) to the untiled case */
+		lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
+		lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
+	} else {
+		row_align = rmesa->texture_row_align - 1;
+		lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align;
+		lvl->size = lvl->rowstride * lvl->height * lvl->depth;
+	}
+	assert(lvl->size > 0);
+
+	/* All images are aligned to a 32-byte offset */
+	*curOffset = (*curOffset + 0x1f) & ~0x1f;
+	lvl->faces[face].offset = *curOffset;
+	*curOffset += lvl->size;
+
+	if (RADEON_DEBUG & RADEON_TEXTURE)
+	  fprintf(stderr,
+		  "level %d, face %d: rs:%d %dx%d at %d\n",
+		  level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset);
+}
+
+static GLuint minify(GLuint size, GLuint levels)
+{
+	size = size >> levels;
+	if (size < 1)
+		size = 1;
+	return size;
+}
+
+
+static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+	GLuint curOffset;
+	GLuint numLevels;
+	GLuint i;
+	GLuint face;
+
+	numLevels = mt->lastLevel - mt->firstLevel + 1;
+	assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+
+	curOffset = 0;
+	for(face = 0; face < mt->faces; face++) {
+
+		for(i = 0; i < numLevels; i++) {
+			mt->levels[i].width = minify(mt->width0, i);
+			mt->levels[i].height = minify(mt->height0, i);
+			mt->levels[i].depth = minify(mt->depth0, i);
+			compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
+		}
+	}
+
+	/* Note the required size in memory */
+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+}
+
+static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+	GLuint curOffset;
+	GLuint numLevels;
+	GLuint i;
+
+	numLevels = mt->lastLevel - mt->firstLevel + 1;
+	assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+
+	curOffset = 0;
+	for(i = 0; i < numLevels; i++) {
+		GLuint face;
+
+		mt->levels[i].width = minify(mt->width0, i);
+		mt->levels[i].height = minify(mt->height0, i);
+		mt->levels[i].depth = minify(mt->depth0, i);
+
+		for(face = 0; face < mt->faces; face++)
+			compute_tex_image_offset(rmesa, mt, face, i, &curOffset);
+	}
+
+	/* Note the required size in memory */
+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+}
+
+/**
+ * Create a new mipmap tree, calculate its layout and allocate memory.
+ */
+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
+		GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel,
+		GLuint width0, GLuint height0, GLuint depth0,
+		GLuint bpp, GLuint tilebits, GLuint compressed)
+{
+	radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
+
+	mt->radeon = rmesa;
+	mt->internal_format = internal_format;
+	mt->refcount = 1;
+	mt->t = t;
+	mt->target = target;
+	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+	mt->firstLevel = firstLevel;
+	mt->lastLevel = lastLevel;
+	mt->width0 = width0;
+	mt->height0 = height0;
+	mt->depth0 = depth0;
+	mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp;
+	mt->tilebits = tilebits;
+	mt->compressed = compressed;
+
+	if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300)
+		calculate_miptree_layout_r300(rmesa, mt);
+	else
+		calculate_miptree_layout_r100(rmesa, mt);
+
+	mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+                            0, mt->totalsize, 1024,
+                            RADEON_GEM_DOMAIN_VRAM,
+                            0);
+
+	return mt;
+}
+
+void radeon_miptree_reference(radeon_mipmap_tree *mt)
+{
+	mt->refcount++;
+	assert(mt->refcount > 0);
+}
+
+void radeon_miptree_unreference(radeon_mipmap_tree *mt)
+{
+	if (!mt)
+		return;
+
+	assert(mt->refcount > 0);
+	mt->refcount--;
+	if (!mt->refcount) {
+		radeon_bo_unref(mt->bo);
+		free(mt);
+	}
+}
+
+
+/**
+ * Calculate first and last mip levels for the given texture object,
+ * where the dimensions are taken from the given texture image at
+ * the given level.
+ *
+ * Note: level is the OpenGL level number, which is not necessarily the same
+ * as the first level that is actually present.
+ *
+ * The base level image of the given texture face must be non-null,
+ * or this will fail.
+ */
+static void calculate_first_last_level(struct gl_texture_object *tObj,
+				       GLuint *pfirstLevel, GLuint *plastLevel,
+				       GLuint face, GLuint level)
+{
+	const struct gl_texture_image * const baseImage =
+		tObj->Image[face][level];
+
+	assert(baseImage);
+	
+	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
+	* and having firstLevel and lastLevel as signed prevents the need for
+	* extra sign checks.
+	*/
+	int   firstLevel;
+	int   lastLevel;
+
+	/* Yes, this looks overly complicated, but it's all needed.
+	*/
+	switch (tObj->Target) {
+	case GL_TEXTURE_1D:
+	case GL_TEXTURE_2D:
+	case GL_TEXTURE_3D:
+	case GL_TEXTURE_CUBE_MAP:
+		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+			*/
+			firstLevel = lastLevel = tObj->BaseLevel;
+		} else {
+			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+			firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2);
+			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+			lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2);
+			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+		}
+		break;
+	case GL_TEXTURE_RECTANGLE_NV:
+	case GL_TEXTURE_4D_SGIS:
+		firstLevel = lastLevel = 0;
+		break;
+	default:
+		return;
+	}
+
+	/* save these values */
+	*pfirstLevel = firstLevel;
+	*plastLevel = lastLevel;
+}
+
+
+/**
+ * Checks whether the given miptree can hold the given texture image at the
+ * given face and level.
+ */
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+		struct gl_texture_image *texImage, GLuint face, GLuint level)
+{
+	radeon_mipmap_level *lvl;
+
+	if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel)
+		return GL_FALSE;
+
+	if (texImage->InternalFormat != mt->internal_format ||
+	    texImage->IsCompressed != mt->compressed)
+		return GL_FALSE;
+
+	if (!texImage->IsCompressed &&
+	    !mt->compressed &&
+	    texImage->TexFormat->TexelBytes != mt->bpp)
+		return GL_FALSE;
+
+	lvl = &mt->levels[level - mt->firstLevel];
+	if (lvl->width != texImage->Width ||
+	    lvl->height != texImage->Height ||
+	    lvl->depth != texImage->Depth)
+		return GL_FALSE;
+
+	return GL_TRUE;
+}
+
+
+/**
+ * Checks whether the given miptree has the right format to store the given texture object.
+ */
+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
+{
+	struct gl_texture_image *firstImage;
+	GLuint compressed;
+	GLuint numfaces = 1;
+	GLuint firstLevel, lastLevel;
+
+	calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel);
+	if (texObj->Target == GL_TEXTURE_CUBE_MAP)
+		numfaces = 6;
+
+	firstImage = texObj->Image[0][firstLevel];
+	compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0;
+
+	return (mt->firstLevel == firstLevel &&
+	        mt->lastLevel == lastLevel &&
+	        mt->width0 == firstImage->Width &&
+	        mt->height0 == firstImage->Height &&
+	        mt->depth0 == firstImage->Depth &&
+	        mt->compressed == compressed &&
+	        (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1));
+}
+
+
+/**
+ * Try to allocate a mipmap tree for the given texture that will fit the
+ * given image in the given position.
+ */
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
+		radeon_texture_image *image, GLuint face, GLuint level)
+{
+	GLuint compressed = image->base.IsCompressed ? image->base.TexFormat->MesaFormat : 0;
+	GLuint numfaces = 1;
+	GLuint firstLevel, lastLevel;
+
+	assert(!t->mt);
+
+	calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level);
+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
+		numfaces = 6;
+
+	if (level != firstLevel || face >= numfaces)
+		return;
+
+	t->mt = radeon_miptree_create(rmesa, t, t->base.Target,
+		image->base.InternalFormat,
+		firstLevel, lastLevel,
+		image->base.Width, image->base.Height, image->base.Depth,
+		image->base.TexFormat->TexelBytes, t->tile_bits, compressed);
+}
+
+/* Although we use the image_offset[] array to store relative offsets
+ * to cube faces, Mesa doesn't know anything about this and expects
+ * each cube face to be treated as a separate image.
+ *
+ * These functions present that view to mesa:
+ */
+void
+radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets)
+{
+     if (mt->target != GL_TEXTURE_3D || mt->faces == 1)
+        offsets[0] = 0;
+     else {
+	int i;
+	for (i = 0; i < 6; i++)
+		offsets[i] = mt->levels[level].faces[i].offset;
+     }
+}
+
+GLuint
+radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+			    GLuint face, GLuint level)
+{
+   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+      return (mt->levels[level].faces[face].offset);
+   else
+      return mt->levels[level].faces[0].offset;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
new file mode 100644
index 00000000000..db28252da37
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_MIPMAP_TREE_H_
+#define __RADEON_MIPMAP_TREE_H_
+
+#include "radeon_common.h"
+
+typedef struct _radeon_mipmap_tree radeon_mipmap_tree;
+typedef struct _radeon_mipmap_level radeon_mipmap_level;
+typedef struct _radeon_mipmap_image radeon_mipmap_image;
+
+struct _radeon_mipmap_image {
+	GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
+};
+
+struct _radeon_mipmap_level {
+	GLuint width;
+	GLuint height;
+	GLuint depth;
+	GLuint size; /** Size of each image, in bytes */
+	GLuint rowstride; /** in bytes */
+	radeon_mipmap_image faces[6];
+};
+
+/* store the max possible in the miptree */
+#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13
+
+/**
+ * A mipmap tree contains texture images in the layout that the hardware
+ * expects.
+ *
+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
+ * changed.
+ */
+struct _radeon_mipmap_tree {
+	radeonContextPtr radeon;
+	radeonTexObj *t;
+	struct radeon_bo *bo;
+	GLuint refcount;
+
+	GLuint totalsize; /** total size of the miptree, in bytes */
+
+	GLenum target; /** GL_TEXTURE_xxx */
+	GLenum internal_format;
+	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
+	GLuint firstLevel; /** First mip level stored in this mipmap tree */
+	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
+
+	GLuint width0; /** Width of firstLevel image */
+	GLuint height0; /** Height of firstLevel image */
+	GLuint depth0; /** Depth of firstLevel image */
+
+	GLuint bpp; /** Bytes per texel */
+	GLuint tilebits; /** RADEON_TXO_xxx_TILE */
+	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
+
+	radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS];
+};
+
+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
+		GLenum target, GLenum internal_format, GLuint firstLevel, GLuint lastLevel,
+		GLuint width0, GLuint height0, GLuint depth0,
+		GLuint bpp, GLuint tilebits, GLuint compressed);
+void radeon_miptree_reference(radeon_mipmap_tree *mt);
+void radeon_miptree_unreference(radeon_mipmap_tree *mt);
+
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+		struct gl_texture_image *texImage, GLuint face, GLuint level);
+GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj);
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
+			      radeon_texture_image *texImage, GLuint face, GLuint level);
+GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+				   GLuint face, GLuint level);
+void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets);
+#endif /* __RADEON_MIPMAP_TREE_H_ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
new file mode 100644
index 00000000000..b79d864ba29
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+#include "radeon_common.h"
+#include "radeon_queryobj.h"
+#include "radeon_debug.h"
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+
+static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *tmp, *query = (struct radeon_query_object *)q;
+
+	foreach(tmp, &radeon->query.not_flushed_head) {
+		if (tmp == query) {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
+{
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+	uint32_t *result;
+	int i;
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE,
+			"%s: query id %d, result %d\n",
+			__FUNCTION__, query->Base.Id, (int) query->Base.Result);
+
+	radeon_bo_map(query->bo, GL_FALSE);
+
+	result = query->bo->ptr;
+
+	query->Base.Result = 0;
+	for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+		query->Base.Result += result[i];
+		radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+	}
+
+	radeon_bo_unmap(query->bo);
+}
+
+static struct gl_query_object * radeonNewQueryObject(GLcontext *ctx, GLuint id)
+{
+	struct radeon_query_object *query;
+
+	query = _mesa_calloc(sizeof(struct radeon_query_object));
+
+	query->Base.Id = id;
+	query->Base.Result = 0;
+	query->Base.Active = GL_FALSE;
+	query->Base.Ready = GL_TRUE;
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __FUNCTION__, query->Base.Id);
+
+	return &query->Base;
+}
+
+static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+	if (query->bo) {
+		radeon_bo_unref(query->bo);
+	}
+
+	_mesa_free(query);
+}
+
+static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+	/* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
+	if (!radeonQueryIsFlushed(ctx, q))
+		ctx->Driver.Flush(ctx);
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);
+
+	radeonQueryGetResult(ctx, q);
+
+	query->Base.Ready = GL_TRUE;
+}
+
+
+static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+	assert(radeon->query.current == NULL);
+
+	if (radeon->dma.flush)
+		radeon->dma.flush(radeon->glCtx);
+
+	if (!query->bo) {
+		query->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, RADEON_QUERY_PAGE_SIZE, RADEON_QUERY_PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0);
+	}
+	query->curr_offset = 0;
+
+	radeon->query.current = query;
+
+	radeon->query.queryobj.dirty = GL_TRUE;
+	radeon->hw.is_dirty = GL_TRUE;
+	insert_at_tail(&radeon->query.not_flushed_head, query);
+
+}
+
+void radeonEmitQueryEnd(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+
+	if (!query)
+		return;
+
+	if (query->emitted_begin == GL_FALSE)
+		return;
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset);
+
+	radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+				      query->bo,
+				      0, RADEON_GEM_DOMAIN_GTT);
+
+	radeon->vtbl.emit_query_finish(radeon);
+}
+
+static void radeonEndQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+	if (radeon->dma.flush)
+		radeon->dma.flush(radeon->glCtx);
+	radeonEmitQueryEnd(ctx);
+
+	radeon->query.current = NULL;
+}
+
+static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+#ifdef DRM_RADEON_GEM_BUSY
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+	if (radeon->radeonScreen->kernel_mm) {
+		struct radeon_query_object *query = (struct radeon_query_object *)q;
+		uint32_t domain;
+
+		/* Need to perform a flush, as per ARB_occlusion_query spec */
+		if (!radeonQueryIsFlushed(ctx, q)) {
+			ctx->Driver.Flush(ctx);
+		}
+
+		if (radeon_bo_is_busy(query->bo, &domain) == 0) {
+			radeonQueryGetResult(ctx, q);
+			query->Base.Ready = GL_TRUE;
+		}
+	} else {
+		radeonWaitQuery(ctx, q);
+	}
+#else
+	radeonWaitQuery(ctx, q);
+#endif
+}
+
+void radeonInitQueryObjFunctions(struct dd_function_table *functions)
+{
+	functions->NewQueryObject = radeonNewQueryObject;
+	functions->DeleteQuery = radeonDeleteQuery;
+	functions->BeginQuery = radeonBeginQuery;
+	functions->EndQuery = radeonEndQuery;
+	functions->CheckQuery = radeonCheckQuery;
+	functions->WaitQuery = radeonWaitQuery;
+}
+
+int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+
+	if (!query || query->emitted_begin)
+		return 0;
+	return atom->cmd_size;
+}
+
+void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	BATCH_LOCALS(radeon);
+	int dwords;
+
+	dwords = (*atom->check) (ctx, atom);
+
+	BEGIN_BATCH_NO_AUTOSTATE(dwords);
+	OUT_BATCH_TABLE(atom->cmd, dwords);
+	END_BATCH();
+
+	radeon->query.current->emitted_begin = GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.h b/src/mesa/drivers/dri/radeon/radeon_queryobj.h
new file mode 100644
index 00000000000..19374dc76b7
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "radeon_common_context.h"
+
+extern void radeonEmitQueryBegin(GLcontext *ctx);
+extern void radeonEmitQueryEnd(GLcontext *ctx);
+
+extern void radeonInitQueryObjFunctions(struct dd_function_table *functions);
+
+#define RADEON_QUERY_PAGE_SIZE 4096
+
+int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom);
+void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom);
+
+static inline void radeon_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+	radeon->query.queryobj.cmd_size = (SZ);
+	radeon->query.queryobj.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t));
+	radeon->query.queryobj.name = "queryobj";
+	radeon->query.queryobj.idx = 0;
+	radeon->query.queryobj.check = radeon_check_query_active;
+	radeon->query.queryobj.dirty = GL_FALSE;
+	radeon->query.queryobj.emit = radeon_emit_queryobj;
+
+	radeon->hw.max_state_size += (SZ);
+	insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
index 6613757fcea..1ab570f5071 100644
--- a/src/mesa/drivers/dri/radeon/radeon_sanity.c
+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
@@ -44,11 +44,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MORE_VERBOSE 1
 
 #if MORE_VERBOSE
-#define VERBOSE (RADEON_DEBUG & DEBUG_VERBOSE)
+#define VERBOSE (RADEON_DEBUG & RADEON_VERBOSE)
 #define NORMAL  (1)
 #else
 #define VERBOSE 0
-#define NORMAL  (RADEON_DEBUG & DEBUG_VERBOSE)
+#define NORMAL  (RADEON_DEBUG & RADEON_VERBOSE)
 #endif
 
 
@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
 }
 
 
-int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+int radeonSanityCmdBuffer( r100ContextPtr rmesa,
 			   int nbox,
 			   drm_clip_rect_t *boxes )
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
index 1ec06bc586b..f30eb1c4f15 100644
--- a/src/mesa/drivers/dri/radeon/radeon_sanity.h
+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
@@ -1,7 +1,7 @@
 #ifndef RADEON_SANITY_H
 #define RADEON_SANITY_H
 
-extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
+extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
 				  int nbox,
 				  drm_clip_rect_t *boxes );
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index a977bedbc28..5ffb55db5ef 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * \author  Gareth Hughes <gareth@valinux.com>
  */
 
+#include <errno.h>
 #include "main/glheader.h"
 #include "main/imports.h"
 #include "main/mtypes.h"
@@ -45,32 +46,42 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_chipset.h"
 #include "radeon_macros.h"
 #include "radeon_screen.h"
+#include "radeon_common.h"
+#include "radeon_span.h"
 #if !RADEON_COMMON
 #include "radeon_context.h"
-#include "radeon_span.h"
 #include "radeon_tex.h"
 #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
 #include "r200_context.h"
 #include "r200_ioctl.h"
-#include "r200_span.h"
 #include "r200_tex.h"
 #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
 #include "r300_context.h"
-#include "r300_fragprog.h"
 #include "r300_tex.h"
-#include "radeon_span.h"
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#include "r600_context.h"
+#include "r700_driconf.h" /* +r6/r7 */
+#include "r600_tex.h"     /* +r6/r7 */
 #endif
 
 #include "utils.h"
 #include "vblank.h"
 #include "drirenderbuffer.h"
 
+#include "radeon_bocs_wrapper.h"
+
 #include "GL/internal/dri_interface.h"
 
 /* Radeon configuration
  */
 #include "xmlpool.h"
 
+#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
+        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
+DRI_CONF_OPT_END
+
 #if !RADEON_COMMON	/* R100 */
 PUBLIC const char __driConfigOptions[] =
 DRI_CONF_BEGIN
@@ -80,6 +91,7 @@ DRI_CONF_BEGIN
         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
         DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
         DRI_CONF_HYPERZ(false)
+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_QUALITY
         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
@@ -95,7 +107,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NO_RAST(false)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
-static const GLuint __driNConfigOptions = 14;
+static const GLuint __driNConfigOptions = 15;
 
 #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
 
@@ -107,6 +119,7 @@ DRI_CONF_BEGIN
         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
         DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
         DRI_CONF_HYPERZ(false)
+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
     DRI_CONF_SECTION_END
     DRI_CONF_SECTION_QUALITY
         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
@@ -126,7 +139,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NV_VERTEX_PROGRAM(false)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
-static const GLuint __driNConfigOptions = 16;
+static const GLuint __driNConfigOptions = 17;
 
 extern const struct dri_extension blend_extensions[];
 extern const struct dri_extension ARB_vp_extension[];
@@ -134,7 +147,10 @@ extern const struct dri_extension NV_vp_extension[];
 extern const struct dri_extension ATI_fs_extension[];
 extern const struct dri_extension point_extensions[];
 
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+
+#define DRI_CONF_FP_OPTIMIZATION_SPEED   0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
 
 /* TODO: integrate these into xmlpool.h! */
 #define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \
@@ -149,11 +165,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
 DRI_CONF_OPT_END
 
-#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
-DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
-        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
-        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
-DRI_CONF_OPT_END
+
 
 #define DRI_CONF_DISABLE_S3TC(def) \
 DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
@@ -208,47 +220,42 @@ static const GLuint __driNConfigOptions = 17;
 
 extern const struct dri_extension gl_20_extension[];
 
-#ifndef RADEON_DEBUG
-int RADEON_DEBUG = 0;
-
-static const struct dri_debug_control debug_control[] = {
-	{"fall", DEBUG_FALLBACKS},
-	{"tex", DEBUG_TEXTURE},
-	{"ioctl", DEBUG_IOCTL},
-	{"prim", DEBUG_PRIMS},
-	{"vert", DEBUG_VERTS},
-	{"state", DEBUG_STATE},
-	{"code", DEBUG_CODEGEN},
-	{"vfmt", DEBUG_VFMT},
-	{"vtxf", DEBUG_VFMT},
-	{"verb", DEBUG_VERBOSE},
-	{"dri", DEBUG_DRI},
-	{"dma", DEBUG_DMA},
-	{"san", DEBUG_SANITY},
-	{"sync", DEBUG_SYNC},
-	{"pix", DEBUG_PIXEL},
-	{"mem", DEBUG_MEMORY},
-	{"allmsg", ~DEBUG_SYNC}, /* avoid the term "sync" because the parser uses strstr */
-	{NULL, 0}
-};
-#endif /* RADEON_DEBUG */
-
 #endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */
 
 extern const struct dri_extension card_extensions[];
+extern const struct dri_extension mm_extensions[];
 
 static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo );
 
 static int
-radeonGetParam(int fd, int param, void *value)
+radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value)
 {
   int ret;
-  drm_radeon_getparam_t gp;
-
-  gp.param = param;
-  gp.value = value;
+  drm_radeon_getparam_t gp = { 0 };
+  struct drm_radeon_info info = { 0 };
+
+  if (sPriv->drm_version.major >= 2) {
+      info.value = (uint64_t)(uintptr_t)value;
+      switch (param) {
+      case RADEON_PARAM_DEVICE_ID:
+          info.request = RADEON_INFO_DEVICE_ID;
+          break;
+      case RADEON_PARAM_NUM_GB_PIPES:
+          info.request = RADEON_INFO_NUM_GB_PIPES;
+          break;
+      case RADEON_PARAM_NUM_Z_PIPES:
+          info.request = RADEON_INFO_NUM_Z_PIPES;
+          break;
+      default:
+          return -EINVAL;
+      }
+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
+  } else {
+      gp.param = param;
+      gp.value = value;
 
-  ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+  }
   return ret;
 }
 
@@ -335,6 +342,12 @@ static const __DRItexOffsetExtension radeonTexOffsetExtension = {
     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
     radeonSetTexOffset,
 };
+
+static const __DRItexBufferExtension radeonTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   radeonSetTexBuffer,
+   radeonSetTexBuffer2,
+};
 #endif
 
 #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
@@ -349,6 +362,12 @@ static const __DRItexOffsetExtension r200texOffsetExtension = {
     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
    r200SetTexOffset,
 };
+
+static const __DRItexBufferExtension r200TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r200SetTexBuffer,
+   r200SetTexBuffer2,
+};
 #endif
 
 #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
@@ -356,137 +375,32 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
    r300SetTexOffset,
 };
-#endif
 
-/* Create the device specific screen private data struct.
- */
-static radeonScreenPtr
-radeonCreateScreen( __DRIscreenPrivate *sPriv )
-{
-   radeonScreenPtr screen;
-   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
-   unsigned char *RADEONMMIO;
-   int i;
-   int ret;
-   uint32_t temp = 0;
-
-   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
-      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
-      return GL_FALSE;
-   }
-
-   /* Allocate the private area */
-   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
-   if ( !screen ) {
-      __driUtilMessage("%s: Could not allocate memory for screen structure",
-		       __FUNCTION__);
-      return NULL;
-   }
-
-#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+static const __DRItexBufferExtension r300TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r300SetTexBuffer,
+   r300SetTexBuffer2,
+};
 #endif
 
-   /* parse information in __driConfigOptions */
-   driParseOptionInfo (&screen->optionCache,
-		       __driConfigOptions, __driNConfigOptions);
-
-   /* This is first since which regions we map depends on whether or
-    * not we are using a PCI card.
-    */
-   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
-   {
-      int ret;
-      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
-			    &screen->gart_buffer_offset);
-
-      if (ret) {
-	 FREE( screen );
-	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
-	 return NULL;
-      }
-
-      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
-			    &screen->gart_base);
-      if (ret) {
-	 FREE( screen );
-	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
-	 return NULL;
-      }
-
-      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
-			    &screen->irq);
-      if (ret) {
-	 FREE( screen );
-	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
-	 return NULL;
-      }
-      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
-      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
-      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
-      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
-      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
-      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
-      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
-   }
-
-   screen->mmio.handle = dri_priv->registerHandle;
-   screen->mmio.size   = dri_priv->registerSize;
-   if ( drmMap( sPriv->fd,
-		screen->mmio.handle,
-		screen->mmio.size,
-		&screen->mmio.map ) ) {
-      FREE( screen );
-      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-      return NULL;
-   }
-
-   RADEONMMIO = screen->mmio.map;
-
-   screen->status.handle = dri_priv->statusHandle;
-   screen->status.size   = dri_priv->statusSize;
-   if ( drmMap( sPriv->fd,
-		screen->status.handle,
-		screen->status.size,
-		&screen->status.map ) ) {
-      drmUnmap( screen->mmio.map, screen->mmio.size );
-      FREE( screen );
-      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-      return NULL;
-   }
-   screen->scratch = (__volatile__ uint32_t *)
-      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-
-   screen->buffers = drmMapBufs( sPriv->fd );
-   if ( !screen->buffers ) {
-      drmUnmap( screen->status.map, screen->status.size );
-      drmUnmap( screen->mmio.map, screen->mmio.size );
-      FREE( screen );
-      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
-      return NULL;
-   }
-
-   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-      screen->gartTextures.handle = dri_priv->gartTexHandle;
-      screen->gartTextures.size   = dri_priv->gartTexMapSize;
-      if ( drmMap( sPriv->fd,
-		   screen->gartTextures.handle,
-		   screen->gartTextures.size,
-		   (drmAddressPtr)&screen->gartTextures.map ) ) {
-	 drmUnmapBufs( screen->buffers );
-	 drmUnmap( screen->status.map, screen->status.size );
-	 drmUnmap( screen->mmio.map, screen->mmio.size );
-	 FREE( screen );
-	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
-	 return NULL;
-      }
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+static const __DRItexOffsetExtension r600texOffsetExtension = {
+    { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+   r600SetTexOffset, /* +r6/r7 */
+};
 
-      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-   }
+static const __DRItexBufferExtension r600TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r600SetTexBuffer,  /* +r6/r7 */
+   r600SetTexBuffer2, /* +r6/r7 */
+};
+#endif
 
+static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
+{
+   screen->device_id = device_id;
    screen->chip_flags = 0;
-   /* XXX: add more chipsets */
-   switch ( dri_priv->deviceID ) {
+   switch ( device_id ) {
    case PCI_CHIP_RADEON_LY:
    case PCI_CHIP_RADEON_LZ:
    case PCI_CHIP_RADEON_QY:
@@ -561,7 +475,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
       screen->chip_family = CHIP_FAMILY_RS300;
       break;
 
-
    case PCI_CHIP_R300_AD:
    case PCI_CHIP_R300_AE:
    case PCI_CHIP_R300_AF:
@@ -819,11 +732,325 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
       screen->chip_flags = RADEON_CHIPSET_TCL;
       break;
 
+   case PCI_CHIP_R600_9400:
+   case PCI_CHIP_R600_9401:
+   case PCI_CHIP_R600_9402:
+   case PCI_CHIP_R600_9403:
+   case PCI_CHIP_R600_9405:
+   case PCI_CHIP_R600_940A:
+   case PCI_CHIP_R600_940B:
+   case PCI_CHIP_R600_940F:
+      screen->chip_family = CHIP_FAMILY_R600;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV610_94C0:
+   case PCI_CHIP_RV610_94C1:
+   case PCI_CHIP_RV610_94C3:
+   case PCI_CHIP_RV610_94C4:
+   case PCI_CHIP_RV610_94C5:
+   case PCI_CHIP_RV610_94C6:
+   case PCI_CHIP_RV610_94C7:
+   case PCI_CHIP_RV610_94C8:
+   case PCI_CHIP_RV610_94C9:
+   case PCI_CHIP_RV610_94CB:
+   case PCI_CHIP_RV610_94CC:
+   case PCI_CHIP_RV610_94CD:
+      screen->chip_family = CHIP_FAMILY_RV610;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV630_9580:
+   case PCI_CHIP_RV630_9581:
+   case PCI_CHIP_RV630_9583:
+   case PCI_CHIP_RV630_9586:
+   case PCI_CHIP_RV630_9587:
+   case PCI_CHIP_RV630_9588:
+   case PCI_CHIP_RV630_9589:
+   case PCI_CHIP_RV630_958A:
+   case PCI_CHIP_RV630_958B:
+   case PCI_CHIP_RV630_958C:
+   case PCI_CHIP_RV630_958D:
+   case PCI_CHIP_RV630_958E:
+   case PCI_CHIP_RV630_958F:
+      screen->chip_family = CHIP_FAMILY_RV630;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV670_9500:
+   case PCI_CHIP_RV670_9501:
+   case PCI_CHIP_RV670_9504:
+   case PCI_CHIP_RV670_9505:
+   case PCI_CHIP_RV670_9506:
+   case PCI_CHIP_RV670_9507:
+   case PCI_CHIP_RV670_9508:
+   case PCI_CHIP_RV670_9509:
+   case PCI_CHIP_RV670_950F:
+   case PCI_CHIP_RV670_9511:
+   case PCI_CHIP_RV670_9515:
+   case PCI_CHIP_RV670_9517:
+   case PCI_CHIP_RV670_9519:
+      screen->chip_family = CHIP_FAMILY_RV670;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV620_95C0:
+   case PCI_CHIP_RV620_95C2:
+   case PCI_CHIP_RV620_95C4:
+   case PCI_CHIP_RV620_95C5:
+   case PCI_CHIP_RV620_95C6:
+   case PCI_CHIP_RV620_95C7:
+   case PCI_CHIP_RV620_95C9:
+   case PCI_CHIP_RV620_95CC:
+   case PCI_CHIP_RV620_95CD:
+   case PCI_CHIP_RV620_95CE:
+   case PCI_CHIP_RV620_95CF:
+      screen->chip_family = CHIP_FAMILY_RV620;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV635_9590:
+   case PCI_CHIP_RV635_9591:
+   case PCI_CHIP_RV635_9593:
+   case PCI_CHIP_RV635_9595:
+   case PCI_CHIP_RV635_9596:
+   case PCI_CHIP_RV635_9597:
+   case PCI_CHIP_RV635_9598:
+   case PCI_CHIP_RV635_9599:
+   case PCI_CHIP_RV635_959B:
+      screen->chip_family = CHIP_FAMILY_RV635;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RS780_9610:
+   case PCI_CHIP_RS780_9611:
+   case PCI_CHIP_RS780_9612:
+   case PCI_CHIP_RS780_9613:
+   case PCI_CHIP_RS780_9614:
+   case PCI_CHIP_RS780_9615:
+   case PCI_CHIP_RS780_9616:
+      screen->chip_family = CHIP_FAMILY_RS780;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+   case PCI_CHIP_RS880_9710:
+   case PCI_CHIP_RS880_9711:
+   case PCI_CHIP_RS880_9712:
+   case PCI_CHIP_RS880_9713:
+   case PCI_CHIP_RS880_9714:
+      screen->chip_family = CHIP_FAMILY_RS880;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV770_9440:
+   case PCI_CHIP_RV770_9441:
+   case PCI_CHIP_RV770_9442:
+   case PCI_CHIP_RV770_9443:
+   case PCI_CHIP_RV770_9444:
+   case PCI_CHIP_RV770_9446:
+   case PCI_CHIP_RV770_944A:
+   case PCI_CHIP_RV770_944B:
+   case PCI_CHIP_RV770_944C:
+   case PCI_CHIP_RV770_944E:
+   case PCI_CHIP_RV770_9450:
+   case PCI_CHIP_RV770_9452:
+   case PCI_CHIP_RV770_9456:
+   case PCI_CHIP_RV770_945A:
+   case PCI_CHIP_RV770_945B:
+   case PCI_CHIP_RV790_9460:
+   case PCI_CHIP_RV790_9462:
+   case PCI_CHIP_RV770_946A:
+   case PCI_CHIP_RV770_946B:
+   case PCI_CHIP_RV770_947A:
+   case PCI_CHIP_RV770_947B:
+      screen->chip_family = CHIP_FAMILY_RV770;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV730_9480:
+   case PCI_CHIP_RV730_9487:
+   case PCI_CHIP_RV730_9488:
+   case PCI_CHIP_RV730_9489:
+   case PCI_CHIP_RV730_948F:
+   case PCI_CHIP_RV730_9490:
+   case PCI_CHIP_RV730_9491:
+   case PCI_CHIP_RV730_9495:
+   case PCI_CHIP_RV730_9498:
+   case PCI_CHIP_RV730_949C:
+   case PCI_CHIP_RV730_949E:
+   case PCI_CHIP_RV730_949F:
+      screen->chip_family = CHIP_FAMILY_RV730;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV710_9540:
+   case PCI_CHIP_RV710_9541:
+   case PCI_CHIP_RV710_9542:
+   case PCI_CHIP_RV710_954E:
+   case PCI_CHIP_RV710_954F:
+   case PCI_CHIP_RV710_9552:
+   case PCI_CHIP_RV710_9553:
+   case PCI_CHIP_RV710_9555:
+   case PCI_CHIP_RV710_9557:
+      screen->chip_family = CHIP_FAMILY_RV710;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV740_94A0:
+   case PCI_CHIP_RV740_94A1:
+   case PCI_CHIP_RV740_94A3:
+   case PCI_CHIP_RV740_94B1:
+   case PCI_CHIP_RV740_94B3:
+   case PCI_CHIP_RV740_94B4:
+   case PCI_CHIP_RV740_94B5:
+   case PCI_CHIP_RV740_94B9:
+      screen->chip_family = CHIP_FAMILY_RV740;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
    default:
       fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
-	      dri_priv->deviceID);
+	      device_id);
+      return -1;
+   }
+
+   return 0;
+}
+
+
+/* Create the device specific screen private data struct.
+ */
+static radeonScreenPtr
+radeonCreateScreen( __DRIscreenPrivate *sPriv )
+{
+   radeonScreenPtr screen;
+   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+   unsigned char *RADEONMMIO = NULL;
+   int i;
+   int ret;
+   uint32_t temp = 0;
+
+   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+   if ( !screen ) {
+      __driUtilMessage("%s: Could not allocate memory for screen structure",
+		       __FUNCTION__);
       return NULL;
    }
+
+   radeon_init_debug();
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   /* This is first since which regions we map depends on whether or
+    * not we are using a PCI card.
+    */
+   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
+   {
+      int ret;
+
+      ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET,
+			    &screen->gart_buffer_offset);
+
+      if (ret) {
+	 FREE( screen );
+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
+	 return NULL;
+      }
+
+      ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BASE,
+			    &screen->gart_base);
+      if (ret) {
+	 FREE( screen );
+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
+	 return NULL;
+      }
+
+      ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR,
+			    &screen->irq);
+      if (ret) {
+	 FREE( screen );
+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
+	 return NULL;
+      }
+      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
+      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
+      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
+      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
+      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
+      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
+      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
+      screen->drmSupportsOcclusionQueries = (sPriv->drm_version.minor >= 30);
+   }
+
+   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
+   if (ret == -1)
+     return NULL;
+
+   screen->mmio.handle = dri_priv->registerHandle;
+   screen->mmio.size   = dri_priv->registerSize;
+   if ( drmMap( sPriv->fd,
+		screen->mmio.handle,
+		screen->mmio.size,
+		&screen->mmio.map ) ) {
+     FREE( screen );
+     __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+     return NULL;
+   }
+
+   RADEONMMIO = screen->mmio.map;
+
+   screen->status.handle = dri_priv->statusHandle;
+   screen->status.size   = dri_priv->statusSize;
+   if ( drmMap( sPriv->fd,
+		screen->status.handle,
+		screen->status.size,
+		&screen->status.map ) ) {
+     drmUnmap( screen->mmio.map, screen->mmio.size );
+     FREE( screen );
+     __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+     return NULL;
+   }
+   if (screen->chip_family < CHIP_FAMILY_R600)
+	   screen->scratch = (__volatile__ uint32_t *)
+		   ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+   else
+	   screen->scratch = (__volatile__ uint32_t *)
+		   ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET);
+
+   screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !screen->buffers ) {
+     drmUnmap( screen->status.map, screen->status.size );
+     drmUnmap( screen->mmio.map, screen->mmio.size );
+     FREE( screen );
+     __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+     return NULL;
+   }
+
+   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+     screen->gartTextures.handle = dri_priv->gartTexHandle;
+     screen->gartTextures.size   = dri_priv->gartTexMapSize;
+     if ( drmMap( sPriv->fd,
+		  screen->gartTextures.handle,
+		  screen->gartTextures.size,
+		  (drmAddressPtr)&screen->gartTextures.map ) ) {
+       drmUnmapBufs( screen->buffers );
+       drmUnmap( screen->status.map, screen->status.size );
+       drmUnmap( screen->mmio.map, screen->mmio.size );
+       FREE( screen );
+       __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+       return NULL;
+    }
+
+     screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+   }
+
    if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
        sPriv->ddx_version.minor < 2) {
       fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
@@ -836,35 +1063,57 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
    }
 
    if (getenv("R300_NO_TCL"))
-     screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+	   screen->chip_flags &= ~RADEON_CHIPSET_TCL;
 
    if (screen->chip_family <= CHIP_FAMILY_RS200)
-      screen->chip_flags |= RADEON_CLASS_R100;
+	   screen->chip_flags |= RADEON_CLASS_R100;
    else if (screen->chip_family <= CHIP_FAMILY_RV280)
-      screen->chip_flags |= RADEON_CLASS_R200;
+	   screen->chip_flags |= RADEON_CLASS_R200;
+   else if (screen->chip_family <= CHIP_FAMILY_RV570)
+	   screen->chip_flags |= RADEON_CLASS_R300;
    else
-      screen->chip_flags |= RADEON_CLASS_R300;
+	   screen->chip_flags |= RADEON_CLASS_R600;
 
    screen->cpp = dri_priv->bpp / 8;
    screen->AGPMode = dri_priv->AGPMode;
 
-   ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
-                         &temp);
-   if (ret) {
-       if (screen->chip_family < CHIP_FAMILY_RS600)
-	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
-       else {
-           FREE( screen );
-           fprintf(stderr, "Unable to get fb location need newer drm\n");
-           return NULL;
+   ret = radeonGetParam(sPriv, RADEON_PARAM_FB_LOCATION, &temp);
+
+   /* +r6/r7 */
+   if(screen->chip_family >= CHIP_FAMILY_R600)
+   {
+       if (ret)
+       {
+            FREE( screen );
+            fprintf(stderr, "Unable to get fb location need newer drm\n");
+            return NULL;
+       }
+       else
+       {
+            screen->fbLocation = (temp & 0xffff) << 24;
        }
-   } else {
-       screen->fbLocation = (temp & 0xffff) << 16;
+   }
+   else
+   {
+        if (ret)
+        {
+            if (screen->chip_family < CHIP_FAMILY_RS600 && !screen->kernel_mm)
+	            screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+            else
+            {
+                FREE( screen );
+                fprintf(stderr, "Unable to get fb location need newer drm\n");
+                return NULL;
+            }
+        }
+        else
+        {
+            screen->fbLocation = (temp & 0xffff) << 16;
+        }
    }
 
-   if (screen->chip_family >= CHIP_FAMILY_R300) {
-       ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES,
-			     &temp);
+   if (IS_R300_CLASS(screen)) {
+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
        if (ret) {
 	   fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
 	   switch (screen->chip_family) {
@@ -902,6 +1151,14 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
 	   break;
        }
 
+       if ( sPriv->drm_version.minor >= 31 ) {
+	       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp);
+	       if (ret)
+		       screen->num_z_pipes = 2;
+	       else
+		       screen->num_z_pipes = temp;
+       } else
+	       screen->num_z_pipes = 2;
    }
 
    if ( sPriv->drm_version.minor >= 10 ) {
@@ -971,7 +1228,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
 
 #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
    if (IS_R200_CLASS(screen))
-       screen->extensions[i++] = &r200AllocateExtension.base;
+      screen->extensions[i++] = &r200AllocateExtension.base;
 
    screen->extensions[i++] = &r200texOffsetExtension.base;
 #endif
@@ -980,11 +1237,173 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
    screen->extensions[i++] = &r300texOffsetExtension.base;
 #endif
 
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+   screen->extensions[i++] = &r600texOffsetExtension.base;
+#endif
+
    screen->extensions[i++] = NULL;
    sPriv->extensions = screen->extensions;
 
    screen->driScreen = sPriv;
    screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
+   screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+					       screen->sarea_priv_offset);
+
+   screen->bom = radeon_bo_manager_legacy_ctor(screen);
+   if (screen->bom == NULL) {
+     free(screen);
+     return NULL;
+   }
+
+   return screen;
+}
+
+static radeonScreenPtr
+radeonCreateScreen2(__DRIscreenPrivate *sPriv)
+{
+   radeonScreenPtr screen;
+   int i;
+   int ret;
+   uint32_t device_id = 0;
+   uint32_t temp = 0;
+
+   /* Allocate the private area */
+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+   if ( !screen ) {
+      __driUtilMessage("%s: Could not allocate memory for screen structure",
+		       __FUNCTION__);
+      fprintf(stderr, "leaving here\n");
+      return NULL;
+   }
+
+   radeon_init_debug();
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   screen->kernel_mm = 1;
+   screen->chip_flags = 0;
+
+   /* if we have kms we can support all of these */
+   screen->drmSupportsCubeMapsR200 = 1;
+   screen->drmSupportsBlendColor = 1;
+   screen->drmSupportsTriPerf = 1;
+   screen->drmSupportsFragShader = 1;
+   screen->drmSupportsPointSprites = 1;
+   screen->drmSupportsCubeMapsR100 = 1;
+   screen->drmSupportsVertexProgram = 1;
+   screen->drmSupportsOcclusionQueries = 1;
+   screen->irq = 1;
+
+   ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id);
+   if (ret) {
+     FREE( screen );
+     fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
+     return NULL;
+   }
+
+   ret = radeon_set_screen_flags(screen, device_id);
+   if (ret == -1)
+     return NULL;
+
+   if (getenv("R300_NO_TCL"))
+	   screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+
+   if (screen->chip_family <= CHIP_FAMILY_RS200)
+	   screen->chip_flags |= RADEON_CLASS_R100;
+   else if (screen->chip_family <= CHIP_FAMILY_RV280)
+	   screen->chip_flags |= RADEON_CLASS_R200;
+   else if (screen->chip_family <= CHIP_FAMILY_RV570)
+	   screen->chip_flags |= RADEON_CLASS_R300;
+   else
+	   screen->chip_flags |= RADEON_CLASS_R600;
+
+   if (IS_R300_CLASS(screen)) {
+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
+       if (ret) {
+	   fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
+	   switch (screen->chip_family) {
+	   case CHIP_FAMILY_R300:
+	   case CHIP_FAMILY_R350:
+	       screen->num_gb_pipes = 2;
+	       break;
+	   case CHIP_FAMILY_R420:
+	   case CHIP_FAMILY_R520:
+	   case CHIP_FAMILY_R580:
+	   case CHIP_FAMILY_RV560:
+	   case CHIP_FAMILY_RV570:
+	       screen->num_gb_pipes = 4;
+	       break;
+	   case CHIP_FAMILY_RV350:
+	   case CHIP_FAMILY_RV515:
+	   case CHIP_FAMILY_RV530:
+	   case CHIP_FAMILY_RV410:
+	   default:
+	       screen->num_gb_pipes = 1;
+	       break;
+	   }
+       } else {
+	   screen->num_gb_pipes = temp;
+       }
+
+       /* pipe overrides */
+       switch (device_id) {
+       case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
+       case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
+       case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
+	   screen->num_gb_pipes = 1;
+	   break;
+       default:
+	   break;
+       }
+
+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp);
+       if (ret)
+	       screen->num_z_pipes = 2;
+       else
+	       screen->num_z_pipes = temp;
+
+   }
+
+   i = 0;
+   screen->extensions[i++] = &driCopySubBufferExtension.base;
+   screen->extensions[i++] = &driFrameTrackingExtension.base;
+   screen->extensions[i++] = &driReadDrawableExtension;
+
+   if ( screen->irq != 0 ) {
+       screen->extensions[i++] = &driSwapControlExtension.base;
+       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+   }
+
+#if !RADEON_COMMON
+   screen->extensions[i++] = &radeonTexBufferExtension.base;
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+   if (IS_R200_CLASS(screen))
+       screen->extensions[i++] = &r200AllocateExtension.base;
+
+   screen->extensions[i++] = &r200TexBufferExtension.base;
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+   screen->extensions[i++] = &r300TexBufferExtension.base;
+#endif
+
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+   screen->extensions[i++] = &r600TexBufferExtension.base;
+#endif
+
+   screen->extensions[i++] = NULL;
+   sPriv->extensions = screen->extensions;
+
+   screen->driScreen = sPriv;
+   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
+   if (screen->bom == NULL) {
+       free(screen);
+       return NULL;
+   }
    return screen;
 }
 
@@ -993,23 +1412,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
 static void
 radeonDestroyScreen( __DRIscreenPrivate *sPriv )
 {
-   radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
+    radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
 
-   if (!screen)
-      return;
+    if (!screen)
+        return;
 
-   if ( screen->gartTextures.map ) {
-      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
-   }
-   drmUnmapBufs( screen->buffers );
-   drmUnmap( screen->status.map, screen->status.size );
-   drmUnmap( screen->mmio.map, screen->mmio.size );
+    if (screen->kernel_mm) {
+#ifdef RADEON_BO_TRACK
+        radeon_tracker_print(&screen->bom->tracker, stderr);
+#endif
+        radeon_bo_manager_gem_dtor(screen->bom);
+    } else {
+        radeon_bo_manager_legacy_dtor(screen->bom);
+
+        if ( screen->gartTextures.map ) {
+            drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+        }
+        drmUnmapBufs( screen->buffers );
+        drmUnmap( screen->status.map, screen->status.size );
+        drmUnmap( screen->mmio.map, screen->mmio.size );
+    }
 
-   /* free all option information */
-   driDestroyOptionInfo (&screen->optionCache);
+    /* free all option information */
+    driDestroyOptionInfo (&screen->optionCache);
 
-   FREE( screen );
-   sPriv->private = NULL;
+    FREE( screen );
+    sPriv->private = NULL;
 }
 
 
@@ -1018,16 +1446,21 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
 static GLboolean
 radeonInitDriver( __DRIscreenPrivate *sPriv )
 {
-   sPriv->private = (void *) radeonCreateScreen( sPriv );
-   if ( !sPriv->private ) {
-      radeonDestroyScreen( sPriv );
-      return GL_FALSE;
-   }
+    if (sPriv->dri2.enabled) {
+        sPriv->private = (void *) radeonCreateScreen2( sPriv );
+    } else {
+        sPriv->private = (void *) radeonCreateScreen( sPriv );
+    }
+    if ( !sPriv->private ) {
+        radeonDestroyScreen( sPriv );
+        return GL_FALSE;
+    }
 
-   return GL_TRUE;
+    return GL_TRUE;
 }
 
 
+
 /**
  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
  *
@@ -1040,132 +1473,112 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
                     const __GLcontextModes *mesaVis,
                     GLboolean isPixmap )
 {
-   radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
+    radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
 
-   if (isPixmap) {
+    const GLboolean swDepth = GL_FALSE;
+    const GLboolean swAlpha = GL_FALSE;
+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+	mesaVis->depthBits != 24;
+    GLenum rgbFormat;
+    struct radeon_framebuffer *rfb;
+
+    if (isPixmap)
       return GL_FALSE; /* not implemented */
-   }
-   else {
-      const GLboolean swDepth = GL_FALSE;
-      const GLboolean swAlpha = GL_FALSE;
-      const GLboolean swAccum = mesaVis->accumRedBits > 0;
-      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
-         mesaVis->depthBits != 24;
-      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-
-      /* front color renderbuffer */
-      {
-         driRenderbuffer *frontRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 driScrnPriv->pFB + screen->frontOffset,
-                                 screen->cpp,
-                                 screen->frontOffset, screen->frontPitch,
-                                 driDrawPriv);
-         radeonSetSpanFunctions(frontRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
-      }
 
-      /* back color renderbuffer */
-      if (mesaVis->doubleBufferMode) {
-         driRenderbuffer *backRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 driScrnPriv->pFB + screen->backOffset,
-                                 screen->cpp,
-                                 screen->backOffset, screen->backPitch,
-                                 driDrawPriv);
-         radeonSetSpanFunctions(backRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
-      }
+    rfb = CALLOC_STRUCT(radeon_framebuffer);
+    if (!rfb)
+      return GL_FALSE;
 
-      /* depth renderbuffer */
-      if (mesaVis->depthBits == 16) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
-                                 driScrnPriv->pFB + screen->depthOffset,
-                                 screen->cpp,
-                                 screen->depthOffset, screen->depthPitch,
-                                 driDrawPriv);
-         radeonSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-	 depthRb->depthHasSurface = screen->depthHasSurface;
-      }
-      else if (mesaVis->depthBits == 24) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
-                                 driScrnPriv->pFB + screen->depthOffset,
-                                 screen->cpp,
-                                 screen->depthOffset, screen->depthPitch,
-                                 driDrawPriv);
-         radeonSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-	 depthRb->depthHasSurface = screen->depthHasSurface;
-      }
+    _mesa_initialize_framebuffer(&rfb->base, mesaVis);
+
+    if (mesaVis->redBits == 5)
+        rgbFormat = GL_RGB5;
+    else if (mesaVis->alphaBits == 0)
+        rgbFormat = GL_RGB8;
+    else
+        rgbFormat = GL_RGBA8;
+
+    /* front color renderbuffer */
+    rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+    _mesa_add_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT, &rfb->color_rb[0]->base);
+    rfb->color_rb[0]->has_surface = 1;
+
+    /* back color renderbuffer */
+    if (mesaVis->doubleBufferMode) {
+      rfb->color_rb[1] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_BACK_LEFT, &rfb->color_rb[1]->base);
+	rfb->color_rb[1]->has_surface = 1;
+    }
 
-      /* stencil renderbuffer */
-      if (mesaVis->stencilBits > 0 && !swStencil) {
-         driRenderbuffer *stencilRb
-            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
-                                 driScrnPriv->pFB + screen->depthOffset,
-                                 screen->cpp,
-                                 screen->depthOffset, screen->depthPitch,
-                                 driDrawPriv);
-         radeonSetSpanFunctions(stencilRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
-	 stencilRb->depthHasSurface = screen->depthHasSurface;
+    if (mesaVis->depthBits == 24) {
+      if (mesaVis->stencilBits == 8) {
+	struct radeon_renderbuffer *depthStencilRb = radeon_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base);
+	depthStencilRb->has_surface = screen->depthHasSurface;
+      } else {
+	/* depth renderbuffer */
+	struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT24, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
+	depth->has_surface = screen->depthHasSurface;
       }
+    } else if (mesaVis->depthBits == 16) {
+      /* just 16-bit depth buffer, no hw stencil */
+	struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT16, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
+	depth->has_surface = screen->depthHasSurface;
+    }
 
-      _mesa_add_soft_renderbuffers(fb,
-                                   GL_FALSE, /* color */
-                                   swDepth,
-                                   swStencil,
-                                   swAccum,
-                                   swAlpha,
-                                   GL_FALSE /* aux */);
-      driDrawPriv->driverPrivate = (void *) fb;
+    _mesa_add_soft_renderbuffers(&rfb->base,
+	    GL_FALSE, /* color */
+	    swDepth,
+	    swStencil,
+	    swAccum,
+	    swAlpha,
+	    GL_FALSE /* aux */);
+    driDrawPriv->driverPrivate = (void *) rfb;
 
-      return (driDrawPriv->driverPrivate != NULL);
-   }
+    return (driDrawPriv->driverPrivate != NULL);
 }
 
 
-static void
-radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb)
 {
-   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
-}
+	struct radeon_renderbuffer *rb;
 
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-/**
- * Choose the appropriate CreateContext function based on the chipset.
- * Eventually, all drivers will go through this process.
- */
-static GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
-				     __DRIcontextPrivate * driContextPriv,
-				     void *sharedContextPriv)
-{
-	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
-
-	if (IS_R300_CLASS(screen))
-		return r300CreateContext(glVisual, driContextPriv, sharedContextPriv);
-        return GL_FALSE;
+	rb = rfb->color_rb[0];
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
+	rb = rfb->color_rb[1];
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
+	rb = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
 }
 
-/**
- * Choose the appropriate DestroyContext function based on the chipset.
- */
-static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv)
+void
+radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
 {
-	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-
-	if (IS_R300_CLASS(radeon->radeonScreen))
-		return r300DestroyContext(driContextPriv);
+    struct radeon_framebuffer *rfb;
+    if (!driDrawPriv)
+	return;
+
+    rfb = (void*)driDrawPriv->driverPrivate;
+    if (!rfb)
+	return;
+    radeon_cleanup_renderbuffers(rfb);
+    _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
 }
 
 
-#endif
-
-
 /**
  * This is the driver specific part of the createNewScreen entry point.
  *
@@ -1191,6 +1604,11 @@ radeonInitScreen(__DRIscreenPrivate *psp)
    static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
    static const __DRIversion dri_expected = { 4, 0, 0 };
    static const __DRIversion drm_expected = { 1, 24, 0 };
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+   static const char *driver_name = "R600";
+   static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 24, 0 };
 #endif
    RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv;
 
@@ -1218,20 +1636,112 @@ radeonInitScreen(__DRIscreenPrivate *psp)
    driInitSingleExtension( NULL, NV_vp_extension );
    driInitSingleExtension( NULL, ATI_fs_extension );
    driInitExtensions( NULL, point_extensions, GL_FALSE );
-#elif defined(RADEON_COMMON_FOR_R300)
+#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
    driInitSingleExtension( NULL, gl_20_extension );
 #endif
 
    if (!radeonInitDriver(psp))
        return NULL;
 
+   /* for now fill in all modes */
    return radeonFillInModes( psp,
 			     dri_priv->bpp,
 			     (dri_priv->bpp == 16) ? 16 : 24,
-			     (dri_priv->bpp == 16) ? 0  : 8,
-			     (dri_priv->backOffset != dri_priv->depthOffset) );
+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
 }
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
 
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const
+__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
+{
+   GLenum fb_format[3];
+   GLenum fb_type[3];
+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+    * support pageflipping at all.
+    */
+   static const GLenum back_buffer_modes[] = {
+     GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/
+   };
+   uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
+   int color;
+   __DRIconfig **configs = NULL;
+
+   /* Calling driInitExtensions here, with a NULL context pointer,
+    * does not actually enable the extensions.  It just makes sure
+    * that all the dispatch offsets for all the extensions that
+    * *might* be enables are known.  This is needed because the
+    * dispatch offsets need to be known when _mesa_context_create
+    * is called, but we can't enable the extensions until we have a
+    * context pointer.
+    *
+    * Hello chicken.  Hello egg.  How are you two today?
+    */
+   driInitExtensions( NULL, card_extensions, GL_FALSE );
+   driInitExtensions( NULL, mm_extensions, GL_FALSE );
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+   driInitExtensions( NULL, blend_extensions, GL_FALSE );
+   driInitSingleExtension( NULL, ARB_vp_extension );
+   driInitSingleExtension( NULL, NV_vp_extension );
+   driInitSingleExtension( NULL, ATI_fs_extension );
+   driInitExtensions( NULL, point_extensions, GL_FALSE );
+#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+   driInitSingleExtension( NULL, gl_20_extension );
+#endif
+
+   if (!radeonInitDriver(psp)) {
+       return NULL;
+    }
+   depth_bits[0] = 0;
+   stencil_bits[0] = 0;
+   depth_bits[1] = 16;
+   stencil_bits[1] = 0;
+   depth_bits[2] = 24;
+   stencil_bits[2] = 0;
+   depth_bits[3] = 24;
+   stencil_bits[3] = 8;
+
+   msaa_samples_array[0] = 0;
+
+   fb_format[0] = GL_RGB;
+   fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
+
+   fb_format[1] = GL_BGR;
+   fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+   fb_format[2] = GL_BGRA;
+   fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
+      __DRIconfig **new_configs;
+
+      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
+				     depth_bits,
+				     stencil_bits,
+				     ARRAY_SIZE(depth_bits),
+				     back_buffer_modes,
+				     ARRAY_SIZE(back_buffer_modes),
+				     msaa_samples_array,
+				     ARRAY_SIZE(msaa_samples_array));
+      if (configs == NULL)
+	 configs = new_configs;
+      else
+	 configs = driConcatConfigs(configs, new_configs);
+   }
+
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+
+   return (const __DRIconfig **)configs;
+}
 
 /**
  * Get information about previous buffer swaps.
@@ -1239,36 +1749,42 @@ radeonInitScreen(__DRIscreenPrivate *psp)
 static int
 getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
 {
-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
-   radeonContextPtr  rmesa;
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-   r200ContextPtr  rmesa;
-#endif
+    struct radeon_framebuffer *rfb;
 
-   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
-	|| (dPriv->driContextPriv->driverPrivate == NULL)
-	|| (sInfo == NULL) ) {
-      return -1;
+    if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	 || (dPriv->driContextPriv->driverPrivate == NULL)
+	 || (sInfo == NULL) ) {
+	return -1;
    }
 
-   rmesa = dPriv->driContextPriv->driverPrivate;
-   sInfo->swap_count = rmesa->swap_count;
-   sInfo->swap_ust = rmesa->swap_ust;
-   sInfo->swap_missed_count = rmesa->swap_missed_count;
+    rfb = dPriv->driverPrivate;
+    sInfo->swap_count = rfb->swap_count;
+    sInfo->swap_ust = rfb->swap_ust;
+    sInfo->swap_missed_count = rfb->swap_missed_count;
 
    sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
-       ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust )
+       ? driCalculateSwapUsage( dPriv, 0, rfb->swap_missed_ust )
        : 0.0;
 
    return 0;
 }
 
-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
 const struct __DriverAPIRec driDriverAPI = {
    .InitScreen      = radeonInitScreen,
    .DestroyScreen   = radeonDestroyScreen,
-   .CreateContext   = radeonCreateContext,
+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+   .CreateContext   = r200CreateContext,
+   .DestroyContext  = r200DestroyContext,
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+   .CreateContext   = r600CreateContext,
+   .DestroyContext  = radeonDestroyContext,
+#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+   .CreateContext   = r300CreateContext,
    .DestroyContext  = radeonDestroyContext,
+#else
+   .CreateContext   = r100CreateContext,
+   .DestroyContext  = radeonDestroyContext,
+#endif
    .CreateBuffer    = radeonCreateBuffer,
    .DestroyBuffer   = radeonDestroyBuffer,
    .SwapBuffers     = radeonSwapBuffers,
@@ -1280,23 +1796,7 @@ const struct __DriverAPIRec driDriverAPI = {
    .WaitForSBC      = NULL,
    .SwapBuffersMSC  = NULL,
    .CopySubBuffer   = radeonCopySubBuffer,
+    /* DRI2 */
+   .InitScreen2     = radeonInitScreen2,
 };
-#else
-const struct __DriverAPIRec driDriverAPI = {
-   .InitScreen      = radeonInitScreen,
-   .DestroyScreen   = radeonDestroyScreen,
-   .CreateContext   = r200CreateContext,
-   .DestroyContext  = r200DestroyContext,
-   .CreateBuffer    = radeonCreateBuffer,
-   .DestroyBuffer   = radeonDestroyBuffer,
-   .SwapBuffers     = r200SwapBuffers,
-   .MakeCurrent     = r200MakeCurrent,
-   .UnbindContext   = r200UnbindContext,
-   .GetSwapInfo     = getSwapInfo,
-   .GetDrawableMSC  = driDrawableGetMSC32,
-   .WaitForMSC      = driWaitForMSC32,
-   .WaitForSBC      = NULL,
-   .SwapBuffersMSC  = NULL,
-   .CopySubBuffer   = r200CopySubBuffer,
-};
-#endif
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
index b84c70bfae9..15744e88284 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -54,11 +54,12 @@ typedef struct {
    drmAddress map;			/* Mapping of the DRM region */
 } radeonRegionRec, *radeonRegionPtr;
 
-typedef struct {
+typedef struct radeon_screen {
    int chip_family;
    int chip_flags;
    int cpp;
    int card_type;
+   int device_id; /* PCI ID */
    int AGPMode;
    unsigned int irq;			/* IRQ number (0 means none) */
 
@@ -98,14 +99,19 @@ typedef struct {
    GLboolean drmSupportsPointSprites;   /* need radeon kernel module >= 1.13 */
    GLboolean drmSupportsCubeMapsR100;   /* need radeon kernel module >= 1.15 */
    GLboolean drmSupportsVertexProgram;  /* need radeon kernel module >= 1.25 */
+   GLboolean drmSupportsOcclusionQueries; /* need radeon kernel module >= 1.30 */
    GLboolean depthHasSurface;
 
    /* Configuration cache with default values for all contexts */
    driOptionCache optionCache;
 
-   const __DRIextension *extensions[8];
+   const __DRIextension *extensions[16];
 
    int num_gb_pipes;
+   int num_z_pipes;
+   int kernel_mm;
+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
+   struct radeon_bo_manager *bom;
 } radeonScreenRec, *radeonScreenPtr;
 
 #define IS_R100_CLASS(screen) \
@@ -114,5 +120,8 @@ typedef struct {
 	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R200)
 #define IS_R300_CLASS(screen) \
 	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300)
+#define IS_R600_CLASS(screen) \
+	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R600)
 
+extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv);
 #endif /* __RADEON_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 12051ff1c81..4e100d854ed 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -43,46 +43,222 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/glheader.h"
 #include "swrast/swrast.h"
 
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
+#include "radeon_common.h"
+#include "radeon_lock.h"
 #include "radeon_span.h"
-#include "radeon_tex.h"
-
-#include "drirenderbuffer.h"
 
 #define DBG 0
 
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+
+
+/* r200 depth buffer is always tiled - this is the formula
+   according to the docs unless I typo'ed in it
+*/
+#if defined(RADEON_COMMON_FOR_R200)
+static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset;
+    if (rrb->has_surface) {
+	offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+	GLuint b;
+	offset = 0;
+	b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
+	offset += (b >> 1) << 12;
+	offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+	offset += ((y >> 2) & 0x3) << 9;
+	offset += ((x >> 3) & 0x1) << 8;
+	offset += ((x >> 4) & 0x3) << 6;
+	offset += ((x >> 2) & 0x1) << 5;
+	offset += ((y >> 1) & 0x1) << 4;
+	offset += ((x >> 1) & 0x1) << 3;
+	offset += (y & 0x1) << 2;
+	offset += (x & 0x1) << 1;
+    }
+    return &ptr[offset];
+}
+
+static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset;
+    if (rrb->has_surface) {
+	offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+	GLuint b;
+	offset = 0;
+	b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
+	offset += (b >> 1) << 12;
+	offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+	offset += ((y >> 2) & 0x3) << 9;
+	offset += ((x >> 2) & 0x1) << 8;
+	offset += ((x >> 3) & 0x3) << 6;
+	offset += ((y >> 1) & 0x1) << 5;
+	offset += ((x >> 1) & 0x1) << 4;
+	offset += (y & 0x1) << 3;
+	offset += (x & 0x1) << 2;
+    }
+    return &ptr[offset];
+}
+#endif
+
+/* radeon tiling on r300-r500 has 4 states,
+   macro-linear/micro-linear
+   macro-linear/micro-tiled
+   macro-tiled /micro-linear
+   macro-tiled /micro-tiled
+   1 byte surface 
+   2 byte surface - two types - we only provide 8x2 microtiling
+   4 byte surface
+   8/16 byte (unused)
+*/
+static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
+			     GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
+		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
+		offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
+		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
+		offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
+		offset += ((y >> 1) & 0x1) << 6;
+		offset += ((x >> 2) & 0x1) << 5;
+		offset += (y & 1) << 4;
+		offset += (x & 3) << 2;
+            } else {
+		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
+		offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
+		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
+		offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
+		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
+		offset += (y & 1) << 6;
+		offset += (x & 15) << 2;
+            }
+        } else {
+	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
+	    offset += (y & 1) << 4;
+	    offset += (x & 3) << 2;
+        }
+    }
+    return &ptr[offset];
+}
+
+static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
+				     GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
+		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
+		offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
+		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
+		offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
+		offset += ((y >> 1) & 0x1) << 6;
+		offset += ((x >> 3) & 0x1) << 5;
+		offset += (y & 1) << 4;
+		offset += (x & 3) << 2;
+            } else {
+		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
+		offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
+		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
+		offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
+		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
+		offset += (y & 1) << 6;
+		offset += ((x >> 4) & 0x1) << 5;
+                offset += (x & 15) << 2;
+            }
+        } else {
+	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
+	    offset += (y & 0x1) << 4;
+	    offset += (x & 0x7) << 1;
+        }
+    }
+    return &ptr[offset];
+}
+
+#ifndef COMPILE_R300
+static uint32_t
+z24s8_to_s8z24(uint32_t val)
+{
+   return (val << 24) | (val >> 8);
+}
+
+static uint32_t
+s8z24_to_z24s8(uint32_t val)
+{
+   return (val >> 24) | (val << 8);
+}
+#endif
+
 /*
  * Note that all information needed to access pixels in a renderbuffer
  * should be obtained through the gl_renderbuffer parameter, not per-context
  * information.
  */
 #define LOCAL_VARS						\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
-   const GLuint bottom = dPriv->h - 1;				\
-   GLubyte *buf = (GLubyte *) drb->flippedData			\
-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
-   GLuint p;							\
-   (void) p;
+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+   unsigned int num_cliprects;						\
+   struct drm_clip_rect *cliprects;					\
+   int x_off, y_off;							\
+   GLuint p;						\
+   (void)p;						\
+   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
 
 #define LOCAL_DEPTH_VARS				\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
-   const GLuint bottom = dPriv->h - 1;			\
-   GLuint xo = dPriv->x;				\
-   GLuint yo = dPriv->y;				\
-   GLubyte *buf = (GLubyte *) drb->Base.Data;
+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
+   struct radeon_renderbuffer *rrb = (void *) rb;	\
+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+   unsigned int num_cliprects;						\
+   struct drm_clip_rect *cliprects;					\
+   int x_off, y_off;							\
+  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 
-#define Y_FLIP(Y) (bottom - (Y))
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
 
 #define HW_LOCK()
 
 #define HW_UNLOCK()
 
+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
+ * the cliprect info from the context, not the driDrawable.
+ * Move this into spantmp2.h someday.
+ */
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = num_cliprects;						\
+      while ( _nc-- ) {							\
+	 int minx = cliprects[_nc].x1 - x_off;				\
+	 int miny = cliprects[_nc].y1 - y_off;				\
+	 int maxx = cliprects[_nc].x2 - x_off;				\
+	 int maxy = cliprects[_nc].y2 - y_off;
+
 /* ================================================================
  * Color buffer
  */
@@ -94,7 +270,41 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define TAG(x)    radeon##x##_RGB565
 #define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 16 bit, ARGB1555 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
+
+#define TAG(x)    radeon##x##_ARGB1555
+#define TAG2(x,y) radeon##x##_ARGB1555##y
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 16 bit, RGBA4 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
+
+#define TAG(x)    radeon##x##_ARGB4444
+#define TAG2(x,y) radeon##x##_ARGB4444##y
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    radeon##x##_xRGB8888
+#define TAG2(x,y) radeon##x##_xRGB8888##y
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
 #include "spantmp2.h"
 
 /* 32 bit, ARGB8888 color spanline and pixel functions
@@ -104,7 +314,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define TAG(x)    radeon##x##_ARGB8888
 #define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
 #include "spantmp2.h"
 
 /* ================================================================
@@ -121,106 +335,127 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  * too...
  */
 
-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-{
-	GLuint pitch = drb->pitch;
-	if (drb->depthHasSurface) {
-		return 4 * (x + y * pitch);
-	} else {
-		GLuint ba, address = 0;	/* a[0..1] = 0           */
-
-#ifdef COMPILE_R300
-		ba = (y / 8) * (pitch / 8) + (x / 8);
-#else
-		ba = (y / 16) * (pitch / 16) + (x / 16);
-#endif
-
-		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
-		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
-		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
-
-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
-		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
-
-		return address;
-	}
-}
-
-static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-{
-	GLuint pitch = drb->pitch;
-	if (drb->depthHasSurface) {
-		return 2 * (x + y * pitch);
-	} else {
-		GLuint ba, address = 0;	/* a[0]    = 0           */
-
-		ba = (y / 16) * (pitch / 32) + (x / 32);
-
-		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
-		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
-		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
-		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
-
-		return address;
-	}
-}
-
 /* 16-bit depth buffer functions
  */
 #define VALUE_TYPE GLushort
 
+#if defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
+#else
 #define WRITE_DEPTH( _x, _y, d )					\
-   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+   *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
+#endif
 
+#if defined(RADEON_COMMON_FOR_R200)
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+   d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
+#else
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
+#endif
 
 #define TAG(x) radeon##x##_z16
 #include "depthtmp.h"
 
-/* 24 bit depth, 8 bit stencil depthbuffer functions
+/* 24 bit depth
  *
  * Careful: It looks like the R300 uses ZZZS byte order while the R200
  * uses SZZZ for 24 bit depth, 8 bit stencil mode.
  */
 #define VALUE_TYPE GLuint
 
-#ifdef COMPILE_R300
+#if defined(COMPILE_R300)
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
    tmp &= 0x000000ff;							\
    tmp |= ((d << 8) & 0xffffff00);					\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);						\
+   *_ptr = tmp;					\
 } while (0)
 #else
 #define WRITE_DEPTH( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
+   GLuint tmp = *_ptr;							\
    tmp &= 0xff000000;							\
    tmp |= ((d) & 0x00ffffff);						\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
 } while (0)
 #endif
 
-#ifdef COMPILE_R300
+#if defined(COMPILE_R300)
 #define READ_DEPTH( d, _x, _y )						\
-  do { \
-    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
-					 _y + yo )) & 0xffffff00) >> 8; \
+  do {									\
+    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
+  }while(0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_DEPTH( d, _x, _y )						\
+  do {									\
+    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
   }while(0)
 #else
+#define READ_DEPTH( d, _x, _y )	\
+  d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off)) & 0x00ffffff;
+#endif
+
+#define TAG(x) radeon##x##_z24
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ * EXT_depth_stencil
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#if defined(COMPILE_R300)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = z24s8_to_s8z24(d);					\
+   *_ptr = tmp;								\
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
+   GLuint tmp = z24s8_to_s8z24(d);					\
+   *_ptr = tmp;					\
+} while (0)
+#endif
+
+#if defined(COMPILE_R300)
+#define READ_DEPTH( d, _x, _y )						\
+  do { \
+    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
+  }while(0)
+#elif defined(RADEON_COMMON_FOR_R200)
 #define READ_DEPTH( d, _x, _y )						\
-   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
-					 _y + yo )) & 0x00ffffff;
+  do { \
+    d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)));	\
+  }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )	do {					\
+    d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off ))); \
+  } while (0)
 #endif
 
 #define TAG(x) radeon##x##_z24_s8
@@ -235,35 +470,51 @@ do {									\
 #ifdef COMPILE_R300
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = *_ptr;				\
    tmp &= 0xffffff00;							\
    tmp |= (d) & 0xff;							\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0x00ffffff;							\
+   tmp |= (((d) & 0xff) << 24);						\
+   *_ptr = tmp;					\
 } while (0)
 #else
 #define WRITE_STENCIL( _x, _y, d )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = *_ptr;				\
    tmp &= 0x00ffffff;							\
    tmp |= (((d) & 0xff) << 24);						\
-   *(GLuint *)(buf + offset) = tmp;					\
+   *_ptr = tmp;					\
 } while (0)
 #endif
 
 #ifdef COMPILE_R300
 #define READ_STENCIL( d, _x, _y )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
    d = tmp & 0x000000ff;						\
 } while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   d = (tmp & 0xff000000) >> 24;					\
+} while (0)
 #else
 #define READ_STENCIL( d, _x, _y )					\
 do {									\
-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
-   GLuint tmp = *(GLuint *)(buf + offset);				\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
    d = (tmp & 0xff000000) >> 24;					\
 } while (0)
 #endif
@@ -271,29 +522,110 @@ do {									\
 #define TAG(x) radeon##x##_z24_s8
 #include "stenciltmp.h"
 
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above).  WaitForIdle() is the main
- * culprit.
- */
+
+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
+{
+	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+	int r;
+
+	if (rrb == NULL || !rrb->bo)
+		return;
+
+	if (flag) {
+		if (rrb->bo->bom->funcs->bo_wait)
+			radeon_bo_wait(rrb->bo);
+		r = radeon_bo_map(rrb->bo, 1);
+		if (r) {
+			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
+				__FUNCTION__, r);
+		}
+
+		radeonSetSpanFunctions(rrb);
+	} else {
+		radeon_bo_unmap(rrb->bo);
+		rb->GetRow = NULL;
+		rb->PutRow = NULL;
+	}
+}
+
+static void
+radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
+{
+	GLuint i, j;
+
+	/* color draw buffers */
+	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
+		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
+
+	/* check for render to textures */
+	for (i = 0; i < BUFFER_COUNT; i++) {
+		struct gl_renderbuffer_attachment *att =
+			ctx->DrawBuffer->Attachment + i;
+		struct gl_texture_object *tex = att->Texture;
+		if (tex) {
+			/* Render to texture. Note that a mipmapped texture need not
+			 * be complete for render to texture, so we must restrict to
+			 * mapping only the attached image.
+			 */
+			radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
+			ASSERT(att->Renderbuffer);
+
+			if (map)
+				radeon_teximage_map(image, GL_TRUE);
+			else
+				radeon_teximage_unmap(image);
+		}
+	}
+
+	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
+
+	/* depth buffer (Note wrapper!) */
+	if (ctx->DrawBuffer->_DepthBuffer)
+		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
+
+	if (ctx->DrawBuffer->_StencilBuffer)
+		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
+}
 
 static void radeonSpanRenderStart(GLcontext * ctx)
 {
 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-#ifdef COMPILE_R300
-	r300ContextPtr r300 = (r300ContextPtr) rmesa;
-	R300_FIREVERTICES(r300);
-#else
-	RADEON_FIREVERTICES(rmesa);
-#endif
-	LOCK_HARDWARE(rmesa);
-	radeonWaitForIdleLocked(rmesa);
+	int i;
+
+	radeon_firevertices(rmesa);
+
+	/* The locking and wait for idle should really only be needed in classic mode.
+	 * In a future memory manager based implementation, this should become
+	 * unnecessary due to the fact that mapping our buffers, textures, etc.
+	 * should implicitly wait for any previous rendering commands that must
+	 * be waited on. */
+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+		LOCK_HARDWARE(rmesa);
+		radeonWaitForIdleLocked(rmesa);
+	}
+
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled)
+			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
+	}
+
+	radeon_map_unmap_buffers(ctx, 1);
 }
 
 static void radeonSpanRenderFinish(GLcontext * ctx)
 {
 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	int i;
 	_swrast_flush(ctx);
-	UNLOCK_HARDWARE(rmesa);
+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+		UNLOCK_HARDWARE(rmesa);
+	}
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled)
+			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
+	}
+
+	radeon_map_unmap_buffers(ctx, 0);
 }
 
 void radeonInitSpanFuncs(GLcontext * ctx)
@@ -307,20 +639,27 @@ void radeonInitSpanFuncs(GLcontext * ctx)
 /**
  * Plug in the Get/Put routines for the given driRenderbuffer.
  */
-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
 {
-	if (drb->Base.InternalFormat == GL_RGBA) {
-		if (vis->redBits == 5 && vis->greenBits == 6
-		    && vis->blueBits == 5) {
-			radeonInitPointers_RGB565(&drb->Base);
-		} else {
-			radeonInitPointers_ARGB8888(&drb->Base);
-		}
-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
-		radeonInitDepthPointers_z16(&drb->Base);
-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
-		radeonInitDepthPointers_z24_s8(&drb->Base);
-	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-		radeonInitStencilPointers_z24_s8(&drb->Base);
+	if (rrb->base._ActualFormat == GL_RGB5) {
+		radeonInitPointers_RGB565(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_RGB8) {
+		radeonInitPointers_xRGB8888(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_RGBA8) {
+		radeonInitPointers_ARGB8888(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_RGBA4) {
+		radeonInitPointers_ARGB4444(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_RGB5_A1) {
+		radeonInitPointers_ARGB1555(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
+		radeonInitDepthPointers_z16(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
+		radeonInitDepthPointers_z24(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
+		radeonInitDepthPointers_z24_s8(&rrb->base);
+	} else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
+		radeonInitStencilPointers_z24_s8(&rrb->base);
+	} else {
+		fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
 	}
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
index 9abe0864b17..ea6a2e7fb4e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.h
+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
@@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #ifndef __RADEON_SPAN_H__
 #define __RADEON_SPAN_H__
 
-#include "drirenderbuffer.h"
-
 extern void radeonInitSpanFuncs(GLcontext * ctx);
-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
 
 #endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index b6561001e76..4d0d35ee0cd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/state.h"
 #include "main/context.h"
 #include "main/framebuffer.h"
+#include "main/simple_list.h"
 
 #include "vbo/vbo.h"
 #include "tnl/tnl.h"
@@ -47,6 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "swrast_setup/swrast_setup.h"
 
 #include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
 #include "radeon_ioctl.h"
 #include "radeon_state.h"
 #include "radeon_tcl.h"
@@ -62,7 +64,7 @@ static void radeonUpdateSpecular( GLcontext *ctx );
 
 static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
    GLubyte refByte;
 
@@ -106,7 +108,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
 static void radeonBlendEquationSeparate( GLcontext *ctx,
 					 GLenum modeRGB, GLenum modeA )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
    GLboolean fallback = GL_FALSE;
 
@@ -147,8 +149,8 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
 				     GLenum sfactorRGB, GLenum dfactorRGB,
 				     GLenum sfactorA, GLenum dfactorA )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] &
       ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
    GLboolean fallback = GL_FALSE;
 
@@ -257,7 +259,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
 
 static void radeonDepthFunc( GLcontext *ctx, GLenum func )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    RADEON_STATECHANGE( rmesa, ctx );
    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
@@ -293,7 +295,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
 
 static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    RADEON_STATECHANGE( rmesa, ctx );
 
    if ( ctx->Depth.Mask ) {
@@ -305,16 +307,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
 
 static void radeonClearDepth( GLcontext *ctx, GLclampd d )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
 		    RADEON_DEPTH_FORMAT_MASK);
 
    switch ( format ) {
    case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
-      rmesa->state.depth.clear = d * 0x0000ffff;
+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
       break;
    case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
-      rmesa->state.depth.clear = d * 0x00ffffff;
+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
       break;
    }
 }
@@ -327,7 +329,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
 
 static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    union { int i; float f; } c, d;
    GLchan col[4];
 
@@ -391,7 +393,7 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
 	 rmesa->hw.fog.cmd[FOG_D] = d.i;
       }
       break;
-   case GL_FOG_COLOR: 
+   case GL_FOG_COLOR:
       RADEON_STATECHANGE( rmesa, ctx );
       UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK;
@@ -406,109 +408,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
    }
 }
 
-
-/* =============================================================
- * Scissoring
- */
-
-
-static GLboolean intersect_rect( drm_clip_rect_t *out,
-				 drm_clip_rect_t *a,
-				 drm_clip_rect_t *b )
-{
-   *out = *a;
-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
-   if ( out->x1 >= out->x2 ) return GL_FALSE;
-   if ( out->y1 >= out->y2 ) return GL_FALSE;
-   return GL_TRUE;
-}
-
-
-void radeonRecalcScissorRects( radeonContextPtr rmesa )
-{
-   drm_clip_rect_t *out;
-   int i;
-
-   /* Grow cliprect store?
-    */
-   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
-      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
-	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
-	 rmesa->state.scissor.numAllocedClipRects *= 2;
-      }
-
-      if (rmesa->state.scissor.pClipRects)
-	 FREE(rmesa->state.scissor.pClipRects);
-
-      rmesa->state.scissor.pClipRects = 
-	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
-		 sizeof(drm_clip_rect_t) );
-
-      if ( rmesa->state.scissor.pClipRects == NULL ) {
-	 rmesa->state.scissor.numAllocedClipRects = 0;
-	 return;
-      }
-   }
-   
-   out = rmesa->state.scissor.pClipRects;
-   rmesa->state.scissor.numClipRects = 0;
-
-   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
-      if ( intersect_rect( out, 
-			   &rmesa->pClipRects[i], 
-			   &rmesa->state.scissor.rect ) ) {
-	 rmesa->state.scissor.numClipRects++;
-	 out++;
-      }
-   }
-}
-
-
-static void radeonUpdateScissor( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( rmesa->dri.drawable ) {
-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-
-      int x = ctx->Scissor.X;
-      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
-      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
-      int h = dPriv->h - ctx->Scissor.Y - 1;
-
-      rmesa->state.scissor.rect.x1 = x + dPriv->x;
-      rmesa->state.scissor.rect.y1 = y + dPriv->y;
-      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
-      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
-
-      radeonRecalcScissorRects( rmesa );
-   }
-}
-
-
-static void radeonScissor( GLcontext *ctx,
-			   GLint x, GLint y, GLsizei w, GLsizei h )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if ( ctx->Scissor.Enabled ) {
-      RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
-      radeonUpdateScissor( ctx );
-   }
-
-}
-
-
 /* =============================================================
  * Culling
  */
 
 static void radeonCullFace( GLcontext *ctx, GLenum unused )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
    GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
 
@@ -545,7 +451,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
 
 static void radeonFrontFace( GLcontext *ctx, GLenum mode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    RADEON_STATECHANGE( rmesa, set );
    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
@@ -553,6 +459,10 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
    RADEON_STATECHANGE( rmesa, tcl );
    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
 
+   /* Winding is inverted when rendering to FBO */
+   if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+      mode = (mode == GL_CW) ? GL_CCW : GL_CW;
+
    switch ( mode ) {
    case GL_CW:
       rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW;
@@ -570,7 +480,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
  */
 static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    RADEON_STATECHANGE( rmesa, lin );
    RADEON_STATECHANGE( rmesa, set );
@@ -587,10 +497,10 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
 
 static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    RADEON_STATECHANGE( rmesa, lin );
-   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
       ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
 }
 
@@ -602,12 +512,19 @@ static void radeonColorMask( GLcontext *ctx,
 			     GLboolean r, GLboolean g,
 			     GLboolean b, GLboolean a )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
-				  ctx->Color.ColorMask[RCOMP],
-				  ctx->Color.ColorMask[GCOMP],
-				  ctx->Color.ColorMask[BCOMP],
-				  ctx->Color.ColorMask[ACOMP] );
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   GLuint mask;
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+
+   mask = radeonPackColor( rrb->cpp,
+			   ctx->Color.ColorMask[RCOMP],
+			   ctx->Color.ColorMask[GCOMP],
+			   ctx->Color.ColorMask[BCOMP],
+			   ctx->Color.ColorMask[ACOMP] );
 
    if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
       RADEON_STATECHANGE( rmesa, msk );
@@ -623,8 +540,9 @@ static void radeonColorMask( GLcontext *ctx,
 static void radeonPolygonOffset( GLcontext *ctx,
 				 GLfloat factor, GLfloat units )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   float_ui32_type constant =  { units * rmesa->state.depth.scale };
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   float_ui32_type constant =  { units * depthScale };
    float_ui32_type factoru = { factor };
 
    RADEON_STATECHANGE( rmesa, zbs );
@@ -632,41 +550,16 @@ static void radeonPolygonOffset( GLcontext *ctx,
    rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
 }
 
-static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint i;
-   drm_radeon_stipple_t stipple;
-
-   /* Must flip pattern upside down.
-    */
-   for ( i = 0 ; i < 32 ; i++ ) {
-      rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
-   }
-
-   /* TODO: push this into cmd mechanism
-    */
-   RADEON_FIREVERTICES( rmesa );
-   LOCK_HARDWARE( rmesa );
-
-   /* FIXME: Use window x,y offsets into stipple RAM.
-    */
-   stipple.mask = rmesa->state.stipple.mask;
-   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
-                    &stipple, sizeof(drm_radeon_stipple_t) );
-   UNLOCK_HARDWARE( rmesa );
-}
-
 static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
 
    /* Can't generally do unfilled via tcl, but some good special
-    * cases work. 
+    * cases work.
     */
    TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
-   if (rmesa->TclFallback) {
+   if (rmesa->radeon.TclFallback) {
       radeonChooseRenderState( ctx );
       radeonChooseVertexState( ctx );
    }
@@ -686,7 +579,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
  */
 static void radeonUpdateSpecular( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
    GLuint flag = 0;
 
@@ -711,7 +604,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
       rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
       p |=  RADEON_SPECULAR_ENABLE;
-      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= 
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &=
 	 ~RADEON_DIFFUSE_SPECULAR_COMBINE;
    }
    else if (ctx->Light.Enabled) {
@@ -741,7 +634,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
 	    RADEON_TCL_COMPUTE_SPECULAR) != 0;
       }
    }
- 
+
    TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag);
 
    if (NEED_SECONDARY_COLOR(ctx)) {
@@ -757,7 +650,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
 
    /* Update vertex/render formats
     */
-   if (rmesa->TclFallback) { 
+   if (rmesa->radeon.TclFallback) {
       radeonChooseRenderState( ctx );
       radeonChooseVertexState( ctx );
    }
@@ -769,12 +662,12 @@ static void radeonUpdateSpecular( GLcontext *ctx )
  */
 
 
-/* Update on colormaterial, material emmissive/ambient, 
+/* Update on colormaterial, material emmissive/ambient,
  * lightmodel.globalambient
  */
 static void update_global_ambient( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    float *fcmd = (float *)RADEON_DB_STATE( glt );
 
    /* Need to do more if both emmissive & ambient are PREMULT:
@@ -782,23 +675,23 @@ static void update_global_ambient( GLcontext *ctx )
     */
    if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
        ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
-	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) 
+	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0)
    {
-      COPY_3V( &fcmd[GLT_RED], 
+      COPY_3V( &fcmd[GLT_RED],
 	       ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
       ACC_SCALE_3V( &fcmd[GLT_RED],
 		   ctx->Light.Model.Ambient,
 		   ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
-   } 
+   }
    else
    {
       COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
    }
-   
+
    RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
 }
 
-/* Update on change to 
+/* Update on change to
  *    - light[p].colors
  *    - light[p].enabled
  */
@@ -809,13 +702,13 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
 /*     fprintf(stderr, "%s\n", __FUNCTION__); */
 
    if (l->Enabled) {
-      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
       float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
 
-      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
       COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
       COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
-      
+
       RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
    }
 }
@@ -829,7 +722,7 @@ static void check_twoside_fallback( GLcontext *ctx )
 
    if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
       if (ctx->Light.ColorMaterialEnabled &&
-	  (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != 
+	  (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) !=
 	  ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
 	 fallback = GL_TRUE;
       else {
@@ -837,7 +730,7 @@ static void check_twoside_fallback( GLcontext *ctx )
 	    if (memcmp( ctx->Light.Material.Attrib[i],
 			ctx->Light.Material.Attrib[i+1],
 			sizeof(GLfloat)*4) != 0) {
-	       fallback = GL_TRUE;  
+	       fallback = GL_TRUE;
 	       break;
 	    }
       }
@@ -849,14 +742,14 @@ static void check_twoside_fallback( GLcontext *ctx )
 
 static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
 {
-      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
       GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
 
       light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
 			   (3 << RADEON_AMBIENT_SOURCE_SHIFT) |
 			   (3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
-			   (3 << RADEON_SPECULAR_SOURCE_SHIFT)); 
-   
+			   (3 << RADEON_SPECULAR_SOURCE_SHIFT));
+
    if (ctx->Light.ColorMaterialEnabled) {
       GLuint mask = ctx->Light.ColorMaterialBitmask;
 
@@ -877,7 +770,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
 	 light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
 			     RADEON_AMBIENT_SOURCE_SHIFT);
       }
-	 
+
       if (mask & MAT_BIT_FRONT_DIFFUSE) {
 	 light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
 			     RADEON_DIFFUSE_SOURCE_SHIFT);
@@ -886,7 +779,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
 	 light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
 			     RADEON_DIFFUSE_SOURCE_SHIFT);
       }
-   
+
       if (mask & MAT_BIT_FRONT_SPECULAR) {
 	 light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
 			     RADEON_SPECULAR_SOURCE_SHIFT);
@@ -904,27 +797,27 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
 		   (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
 		   (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT);
    }
-   
+
       if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
 	 RADEON_STATECHANGE( rmesa, tcl );
-	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;      
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;
    }
 }
 
 void radeonUpdateMaterial( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
    GLuint mask = ~0;
-   
+
    if (ctx->Light.ColorMaterialEnabled)
       mask &= ~ctx->Light.ColorMaterialBitmask;
 
-   if (RADEON_DEBUG & DEBUG_STATE)
+   if (RADEON_DEBUG & RADEON_STATE)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
-      
+
    if (mask & MAT_BIT_FRONT_EMISSION) {
       fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
       fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
@@ -974,11 +867,11 @@ void radeonUpdateMaterial( GLcontext *ctx )
  *
  * which are calculated in light.c and are correct for the current
  * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
- * and _MESA_NEW_NEED_EYE_COORDS.  
+ * and _MESA_NEW_NEED_EYE_COORDS.
  */
 static void update_light( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    /* Have to check these, or have an automatic shortcircuit mechanism
     * to remove noop statechanges. (Or just do a better job on the
@@ -991,12 +884,12 @@ static void update_light( GLcontext *ctx )
 	 tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
       else
 	 tmp |= RADEON_LIGHT_IN_MODELSPACE;
-      
+
 
       /* Leave this test disabled: (unexplained q3 lockup) (even with
          new packets)
       */
-      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) 
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL])
       {
 	 RADEON_STATECHANGE( rmesa, tcl );
 	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
@@ -1020,10 +913,10 @@ static void update_light( GLcontext *ctx )
 	 if (ctx->Light.Light[p].Enabled) {
 	    struct gl_light *l = &ctx->Light.Light[p];
 	    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
-	    
+
 	    if (l->EyePosition[3] == 0.0) {
-	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); 
-	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); 
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
 	       fcmd[LIT_POSITION_W] = 0;
 	       fcmd[LIT_DIRECTION_W] = 0;
 	    } else {
@@ -1043,30 +936,30 @@ static void update_light( GLcontext *ctx )
 static void radeonLightfv( GLcontext *ctx, GLenum light,
 			   GLenum pname, const GLfloat *params )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLint p = light - GL_LIGHT0;
    struct gl_light *l = &ctx->Light.Light[p];
    GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
-   
+
 
    switch (pname) {
-   case GL_AMBIENT:		
+   case GL_AMBIENT:
    case GL_DIFFUSE:
    case GL_SPECULAR:
       update_light_colors( ctx, p );
       break;
 
-   case GL_SPOT_DIRECTION: 
-      /* picked up in update_light */	
+   case GL_SPOT_DIRECTION:
+      /* picked up in update_light */
       break;
 
    case GL_POSITION: {
-      /* positions picked up in update_light, but can do flag here */	
+      /* positions picked up in update_light, but can do flag here */
       GLuint flag;
       GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
 
       /* FIXME: Set RANGE_ATTEN only when needed */
-      if (p&1) 
+      if (p&1)
 	 flag = RADEON_LIGHT_1_IS_LOCAL;
       else
 	 flag = RADEON_LIGHT_0_IS_LOCAL;
@@ -1158,16 +1051,16 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
    }
 }
 
-		  
+
 
 
 static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
 				const GLfloat *param )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    switch (pname) {
-      case GL_LIGHT_MODEL_AMBIENT: 
+      case GL_LIGHT_MODEL_AMBIENT:
 	 update_global_ambient( ctx );
 	 break;
 
@@ -1188,7 +1081,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
 
 	 check_twoside_fallback( ctx );
 
-	 if (rmesa->TclFallback) {
+	 if (rmesa->radeon.TclFallback) {
 	    radeonChooseRenderState( ctx );
 	    radeonChooseVertexState( ctx );
 	 }
@@ -1205,7 +1098,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
 
 static void radeonShadeModel( GLcontext *ctx, GLenum mode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
 
    s &= ~(RADEON_DIFFUSE_SHADE_MASK |
@@ -1244,7 +1137,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
 static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
 {
    GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
 
    RADEON_STATECHANGE( rmesa, ucp[p] );
@@ -1256,7 +1149,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
 
 static void radeonUpdateClipPlanes( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint p;
 
    for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
@@ -1281,7 +1174,7 @@ static void
 radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
                            GLint ref, GLuint mask )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
 		     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
 
@@ -1325,7 +1218,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
 static void
 radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    RADEON_STATECHANGE( rmesa, msk );
    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
@@ -1336,20 +1229,20 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
 static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
                                      GLenum zfail, GLenum zpass )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
    /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
       and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
       but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */
-   
+
    GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP;
    GLuint tempRADEON_STENCIL_FAIL_INC_WRAP;
    GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP;
    GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP;
    GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
    GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
-   
-   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
+
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
       tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
       tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
       tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
@@ -1365,7 +1258,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
       tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP;
       tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP;
    }
-   
+
    RADEON_STATECHANGE( rmesa, ctx );
    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
 					       RADEON_STENCIL_ZFAIL_MASK |
@@ -1455,9 +1348,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
 
 static void radeonClearStencil( GLcontext *ctx, GLint s )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
-   rmesa->state.stencil.clear = 
+   rmesa->radeon.state.stencil.clear =
       ((GLuint) (ctx->Stencil.Clear & 0xff) |
        (0xff << RADEON_STENCIL_MASK_SHIFT) |
        ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
@@ -1481,20 +1374,30 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
  */
 void radeonUpdateWindow( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
-   GLfloat xoffset = (GLfloat)dPriv->x;
-   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
    const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = yoffset;
+   }
 
    float_ui32_type sx = { v[MAT_SX] };
    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
-   float_ui32_type sy = { - v[MAT_SY] };
-   float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
-   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
-   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
+   float_ui32_type sy = { v[MAT_SY] * y_scale };
+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
 
-   RADEON_FIREVERTICES( rmesa );
    RADEON_STATECHANGE( rmesa, vpt );
 
    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
@@ -1514,6 +1417,8 @@ static void radeonViewport( GLcontext *ctx, GLint x, GLint y,
     * values, or keep the originals hanging around.
     */
    radeonUpdateWindow( ctx );
+
+   radeon_viewport(ctx, x, y, width, height);
 }
 
 static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
@@ -1524,8 +1429,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
 
 void radeonUpdateViewportOffset( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLfloat xoffset = (GLfloat)dPriv->x;
    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
    const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -1555,8 +1460,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
                 RADEON_STIPPLE_Y_OFFSET_MASK);
 
          /* add magic offsets, then invert */
-         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
-         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
+         stx = 31 - ((dPriv->x - 1) & RADEON_STIPPLE_COORD_MASK);
+         sty = 31 - ((dPriv->y + dPriv->h - 1)
                      & RADEON_STIPPLE_COORD_MASK);
 
          m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
@@ -1580,20 +1485,26 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
 
 static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLubyte c[4];
+   struct radeon_renderbuffer *rrb;
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+     
    CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
    CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
    CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
    CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
-   rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
+   rmesa->radeon.state.color.clear = radeonPackColor( rrb->cpp,
 					       c[0], c[1], c[2], c[3] );
 }
 
 
 static void radeonRenderMode( GLcontext *ctx, GLenum mode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
 }
 
@@ -1619,7 +1530,7 @@ static GLuint radeon_rop_tab[] = {
 
 static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint rop = (GLuint)opcode - GL_CLEAR;
 
    ASSERT( rop < 16 );
@@ -1628,108 +1539,16 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
    rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
 }
 
-
-/**
- * Set up the cliprects for either front or back-buffer drawing.
- */
-void radeonSetCliprects( radeonContextPtr rmesa )
-{
-   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
-   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
-   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
-   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
-
-   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
-      /* Can't ignore 2d windows if we are page flipping.
-       */
-      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
-	 rmesa->numClipRects = drawable->numClipRects;
-	 rmesa->pClipRects = drawable->pClipRects;
-      }
-      else {
-	 rmesa->numClipRects = drawable->numBackClipRects;
-	 rmesa->pClipRects = drawable->pBackClipRects;
-      }
-   }
-   else {
-      /* front buffer (or none, or multiple buffers */
-      rmesa->numClipRects = drawable->numClipRects;
-      rmesa->pClipRects = drawable->pClipRects;
-   }
-
-   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
-      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
-			       drawable->w, drawable->h);
-      draw_fb->Initialized = GL_TRUE;
-   }
-
-   if (drawable != readable) {
-      if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) {
-	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
-				  readable->w, readable->h);
-	 read_fb->Initialized = GL_TRUE;
-      }
-   }
-
-   if (rmesa->state.scissor.enabled)
-      radeonRecalcScissorRects( rmesa );
-
-   rmesa->lastStamp = drawable->lastStamp;
-}
-
-
-/**
- * Called via glDrawBuffer.
- */
-static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if (RADEON_DEBUG & DEBUG_DRI)
-      fprintf(stderr, "%s %s\n", __FUNCTION__,
-	      _mesa_lookup_enum_by_nr( mode ));
-
-   RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
-
-   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
-      /* 0 (GL_NONE) buffers or multiple color drawing buffers */
-      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return;
-   }
-
-   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
-   case BUFFER_FRONT_LEFT:
-   case BUFFER_BACK_LEFT:
-      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
-   default:
-      FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return;
-   }
-
-   radeonSetCliprects( rmesa );
-
-   /* We'll set the drawing engine's offset/pitch parameters later
-    * when we update other state.
-    */
-}
-
-static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
-{
-   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
-}
-
-
 /* =============================================================
  * State enable/disable
  */
 
 static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint p, flag;
 
-   if ( RADEON_DEBUG & DEBUG_STATE )
+   if ( RADEON_DEBUG & RADEON_STATE )
       fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
 	       _mesa_lookup_enum_by_nr( cap ),
 	       state ? "GL_TRUE" : "GL_FALSE" );
@@ -1787,7 +1606,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
    case GL_CLIP_PLANE2:
    case GL_CLIP_PLANE3:
    case GL_CLIP_PLANE4:
-   case GL_CLIP_PLANE5: 
+   case GL_CLIP_PLANE5:
       p = cap-GL_CLIP_PLANE0;
       RADEON_STATECHANGE( rmesa, tcl );
       if (state) {
@@ -1821,10 +1640,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
       RADEON_STATECHANGE(rmesa, ctx );
       if ( state ) {
 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
-	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
       } else {
 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
-	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
       }
       break;
 
@@ -1852,13 +1671,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
    case GL_LIGHT7:
       RADEON_STATECHANGE(rmesa, tcl);
       p = cap - GL_LIGHT0;
-      if (p&1) 
+      if (p&1)
 	 flag = (RADEON_LIGHT_1_ENABLE |
-		 RADEON_LIGHT_1_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_1_ENABLE_AMBIENT |
 		 RADEON_LIGHT_1_ENABLE_SPECULAR);
       else
 	 flag = (RADEON_LIGHT_0_ENABLE |
-		 RADEON_LIGHT_0_ENABLE_AMBIENT | 
+		 RADEON_LIGHT_0_ENABLE_AMBIENT |
 		 RADEON_LIGHT_0_ENABLE_SPECULAR);
 
       if (state)
@@ -1866,7 +1685,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
       else
 	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
 
-      /* 
+      /*
        */
       update_light_colors( ctx, p );
       break;
@@ -1904,7 +1723,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
       }
       break;
-      
+
    case GL_NORMALIZE:
       RADEON_STATECHANGE( rmesa, tcl );
       if ( state ) {
@@ -1971,21 +1790,30 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
    }
 
    case GL_SCISSOR_TEST:
-      RADEON_FIREVERTICES( rmesa );
-      rmesa->state.scissor.enabled = state;
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
       radeonUpdateScissor( ctx );
       break;
 
    case GL_STENCIL_TEST:
-      if ( rmesa->state.stencil.hwBuffer ) {
-	 RADEON_STATECHANGE( rmesa, ctx );
-	 if ( state ) {
-	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
+      {
+	 GLboolean hw_stencil = GL_FALSE;
+	 if (ctx->DrawBuffer) {
+	    struct radeon_renderbuffer *rrbStencil
+	       = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+	    hw_stencil = (rrbStencil && rrbStencil->bo);
+	 }
+
+	 if (hw_stencil) {
+	    RADEON_STATECHANGE( rmesa, ctx );
+	    if ( state ) {
+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
+	    } else {
+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+	    }
 	 } else {
-	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+	    FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
 	 }
-      } else {
-	 FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
       }
       break;
 
@@ -1995,7 +1823,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
    case GL_TEXTURE_GEN_T:
       /* Picked up in radeonUpdateTextureState.
        */
-      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; 
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
       break;
 
    case GL_COLOR_SUM_EXT:
@@ -2010,11 +1838,11 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
 
 static void radeonLightingSpaceChange( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLboolean tmp;
    RADEON_STATECHANGE( rmesa, tcl );
 
-   if (RADEON_DEBUG & DEBUG_STATE)
+   if (RADEON_DEBUG & RADEON_STATE)
       fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
 	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
 
@@ -2029,7 +1857,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
       rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
    }
 
-   if (RADEON_DEBUG & DEBUG_STATE) 
+   if (RADEON_DEBUG & RADEON_STATE)
       fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
 	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
 }
@@ -2039,7 +1867,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
  */
 
 
-void radeonUploadTexMatrix( radeonContextPtr rmesa,
+void radeonUploadTexMatrix( r100ContextPtr rmesa,
 			    int unit, GLboolean swapcols )
 {
 /* Here's how this works: on r100, only 3 tex coords can be submitted, so the
@@ -2065,7 +1893,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
    int idx = TEXMAT_0 + unit;
    float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
    int i;
-   struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
+   struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
    GLfloat *src = rmesa->tmpmat[unit].m;
 
    rmesa->TexMatColSwap &= ~(1 << unit);
@@ -2119,7 +1947,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
 }
 
 
-static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
 {
    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
    int i;
@@ -2135,7 +1963,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
 }
 
-static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
 {
    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
    memcpy(dest, src, 16*sizeof(float));
@@ -2145,7 +1973,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
 
 static void update_texturematrix( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
    GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
    GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
    int unit;
@@ -2209,64 +2037,74 @@ static void update_texturematrix( GLcontext *ctx )
    }
 }
 
-
-/**
- * Tell the card where to render (offset, pitch).
- * Effected by glDrawBuffer, etc
- */
-void
-radeonUpdateDrawBuffer(GLcontext *ctx)
+static GLboolean r100ValidateBuffers(GLcontext *ctx)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-   driRenderbuffer *drb;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   int i, ret;
 
-   if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
-      /* draw to front */
-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-   }
-   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
-      /* draw to back */
-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+   radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+				       0, RADEON_GEM_DOMAIN_VRAM);
    }
-   else {
-      /* drawing to multiple buffers, or none */
-      return;
+
+   /* depth buffer */
+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+				       0, RADEON_GEM_DOMAIN_VRAM);
    }
 
-   assert(drb);
-   assert(drb->flippedPitch);
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+      radeonTexObj *t;
 
-   RADEON_STATECHANGE( rmesa, ctx );
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+	 continue;
 
-   /* Note: we used the (possibly) page-flipped values */
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
-     = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
-	& RADEON_COLOROFFSET_MASK);
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
-   if (rmesa->sarea->tiling_enabled) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+      t = rmesa->state.texture.unit[i].texobj;
+      if (t->image_override && t->bo)
+	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+      else if (t->mt->bo)
+	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
    }
-}
 
+   ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+   if (ret)
+       return GL_FALSE;
+   return GL_TRUE;
+}
 
-void radeonValidateState( GLcontext *ctx )
+GLboolean radeonValidateState( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint new_state = rmesa->NewGLState;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint new_state = rmesa->radeon.NewGLState;
 
-   if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
-     radeonUpdateDrawBuffer(ctx);
+   if (new_state & _NEW_BUFFERS) {
+     _mesa_update_framebuffer(ctx);
+     /* this updates the DrawBuffer's Width/Height if it's a FBO */
+     _mesa_update_draw_buffer_bounds(ctx);
+     RADEON_STATECHANGE(rmesa, ctx);
    }
 
    if (new_state & _NEW_TEXTURE) {
       radeonUpdateTextureState( ctx );
-      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
    }
 
+   /* we need to do a space check here */
+   if (!r100ValidateBuffers(ctx))
+     return GL_FALSE;
+
    /* Need an event driven matrix update?
     */
-   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) 
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
       upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
 
    /* Need these for lighting (shouldn't upload otherwise)
@@ -2290,12 +2128,14 @@ void radeonValidateState( GLcontext *ctx )
    /* emit all active clip planes if projection matrix changes.
     */
    if (new_state & (_NEW_PROJECTION)) {
-      if (ctx->Transform.ClipPlanesEnabled) 
+      if (ctx->Transform.ClipPlanesEnabled)
 	 radeonUpdateClipPlanes( ctx );
    }
 
 
-   rmesa->NewGLState = 0;
+   rmesa->radeon.NewGLState = 0;
+
+   return GL_TRUE;
 }
 
 
@@ -2306,7 +2146,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
    _vbo_InvalidateState( ctx, new_state );
    _tnl_InvalidateState( ctx, new_state );
    _ae_invalidate_state( ctx, new_state );
-   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
+   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
 }
 
 
@@ -2317,8 +2157,8 @@ static GLboolean check_material( GLcontext *ctx )
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    GLint i;
 
-   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; 
-	i < _TNL_ATTRIB_MAT_BACK_INDEXES; 
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+	i < _TNL_ATTRIB_MAT_BACK_INDEXES;
 	i++)
       if (tnl->vb.AttribPtr[i] &&
 	  tnl->vb.AttribPtr[i]->stride)
@@ -2326,20 +2166,21 @@ static GLboolean check_material( GLcontext *ctx )
 
    return GL_FALSE;
 }
-      
+
 
 static void radeonWrapRunPipeline( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLboolean has_material;
 
    if (0)
-      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
 
    /* Validate state:
     */
-   if (rmesa->NewGLState)
-      radeonValidateState( ctx );
+   if (rmesa->radeon.NewGLState)
+      if (!radeonValidateState( ctx ))
+	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
 
    has_material = (ctx->Light.Enabled && check_material( ctx ));
 
@@ -2348,7 +2189,7 @@ static void radeonWrapRunPipeline( GLcontext *ctx )
    }
 
    /* Run the pipeline.
-    */ 
+    */
    _tnl_run_pipeline( ctx );
 
    if (has_material) {
@@ -2356,12 +2197,28 @@ static void radeonWrapRunPipeline( GLcontext *ctx )
    }
 }
 
+static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   GLint i;
+
+   radeon_firevertices(&r100->radeon);
+
+   RADEON_STATECHANGE(r100, stp);
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 31 ; i >= 0; i--) {
+     r100->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+   }
+}
+
 
 /* Initialize the driver's state functions.
  * Many of the ctx->Driver functions might have been initialized to
  * software defaults in the earlier _mesa_init_driver_functions() call.
  */
-void radeonInitStateFuncs( GLcontext *ctx )
+void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 )
 {
    ctx->Driver.UpdateState		= radeonInvalidateState;
    ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
@@ -2394,7 +2251,10 @@ void radeonInitStateFuncs( GLcontext *ctx )
    ctx->Driver.LogicOpcode		= radeonLogicOpCode;
    ctx->Driver.PolygonMode		= radeonPolygonMode;
    ctx->Driver.PolygonOffset		= radeonPolygonOffset;
-   ctx->Driver.PolygonStipple		= radeonPolygonStipple;
+   if (dri2)
+      ctx->Driver.PolygonStipple		= radeonPolygonStipple;
+   else
+      ctx->Driver.PolygonStipple		= radeonPolygonStipplePreKMS;
    ctx->Driver.RenderMode		= radeonRenderMode;
    ctx->Driver.Scissor			= radeonScissor;
    ctx->Driver.ShadeModel		= radeonShadeModel;
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
index 2171879f759..c780cff0cfb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.h
+++ b/src/mesa/drivers/dri/radeon/radeon_state.h
@@ -39,30 +39,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "radeon_context.h"
 
-extern void radeonInitState( radeonContextPtr rmesa );
-extern void radeonInitStateFuncs( GLcontext *ctx );
+extern void radeonInitState( r100ContextPtr rmesa );
+extern void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2);
 
 extern void radeonUpdateMaterial( GLcontext *ctx );
 
-extern void radeonSetCliprects( radeonContextPtr rmesa );
-extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
 extern void radeonUpdateViewportOffset( GLcontext *ctx );
 extern void radeonUpdateWindow( GLcontext *ctx );
 extern void radeonUpdateDrawBuffer( GLcontext *ctx );
-extern void radeonUploadTexMatrix( radeonContextPtr rmesa,
+extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
 				   int unit, GLboolean swapcols );
 
-extern void radeonValidateState( GLcontext *ctx );
-
-extern void radeonPrintDirty( radeonContextPtr rmesa,
-			      const char *msg );
+extern GLboolean radeonValidateState( GLcontext *ctx );
 
 
 extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
 #define FALLBACK( rmesa, bit, mode ) do {				\
    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
 		     __FUNCTION__, bit, mode );				\
-   radeonFallback( rmesa->glCtx, bit, mode );				\
+   radeonFallback( rmesa->radeon.glCtx, bit, mode );				\
 } while (0)
 
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index 57dc3800501..f3ad0dd17af 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -38,39 +38,141 @@
 #include "swrast_setup/swrast_setup.h"
 
 #include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
 #include "radeon_ioctl.h"
 #include "radeon_state.h"
 #include "radeon_tcl.h"
 #include "radeon_tex.h"
 #include "radeon_swtcl.h"
+#include "radeon_queryobj.h"
+
+#include "../r200/r200_reg.h"
 
 #include "xmlpool.h"
 
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+	int start;
+	int len;
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+
 /* =============================================================
  * State initialization
  */
-
-void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
+static int cmdpkt( r100ContextPtr rmesa, int id ) 
 {
-   struct radeon_state_atom *l;
-
-   fprintf(stderr, msg);
-   fprintf(stderr, ": ");
+   drm_radeon_cmd_header_t h;
 
-   foreach(l, &rmesa->hw.atomlist) {
-      if (l->dirty || rmesa->hw.all_dirty)
-	 fprintf(stderr, "%s, ", l->name);
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
+   } else {
+     h.i = 0;
+     h.packet.cmd_type = RADEON_CMD_PACKET;
+     h.packet.packet_id = id;
    }
-
-   fprintf(stderr, "\n");
-}
-
-static int cmdpkt( int id ) 
-{
-   drm_radeon_cmd_header_t h;
-   h.i = 0;
-   h.packet.cmd_type = RADEON_CMD_PACKET;
-   h.packet.packet_id = id;
    return h.i;
 }
 
@@ -96,131 +198,523 @@ static int cmdscl( int offset, int stride, int count )
    return h.i;
 }
 
-#define CHECK( NM, FLAG )			\
-static GLboolean check_##NM( GLcontext *ctx )	\
-{						\
-   return FLAG;					\
+#define CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
+{							\
+   return FLAG ? atom->cmd_size + (ADD) : 0;			\
 }
 
-#define TCL_CHECK( NM, FLAG )				\
-static GLboolean check_##NM( GLcontext *ctx )		\
+#define TCL_CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
 {							\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
-   return !rmesa->TclFallback && (FLAG);		\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);	\
+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;	\
 }
 
 
-CHECK( always, GL_TRUE )
-CHECK( never, GL_FALSE )
-CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
-CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add2, GL_TRUE, 2 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex0_mm, ctx->Texture.Unit[0]._ReallyEnabled, 3 )
+CHECK( tex1_mm, ctx->Texture.Unit[1]._ReallyEnabled, 3 )
 /* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */
-CHECK( tex2, ctx->Texture._EnabledUnits )
-CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT))
-CHECK( fog, ctx->Fog.Enabled )
-TCL_CHECK( tcl, GL_TRUE )
-TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
-TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
-TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled )
-TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
-TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
-TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
-TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled )
-TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled )
-TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled )
-TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled )
-TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled )
-TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled )
-TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled )
-TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1) )
-TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2) )
-TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4) )
-TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8) )
-TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10) )
-TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) )
-TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) 
-
-CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
-CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
-CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
+CHECK( tex2_mm, ctx->Texture._EnabledUnits, 3 )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled, 2 )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled, 2 )
+CHECK( tex2, ctx->Texture._EnabledUnits, 2 )
+CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( fog, ctx->Fog.Enabled, 0 )
+CHECK( fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 0 )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 0 )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 0 )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 0 )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 0 )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 0 )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 0 )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 0 )
+TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 )
+TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 )
+TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 )
+TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 )
+TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 )
+TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 )
+TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 )
+TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 )
+TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1), 0 )
+TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2), 0 )
+TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4), 0 )
+TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8), 0 )
+TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10), 0 )
+TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20), 0 )
+TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 )
+TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 )
+TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 )
+TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 )
+TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 )
+TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 )
+
+CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+
+#define OUT_VEC(hdr, data) do {			\
+    drm_radeon_cmd_header_t h;					\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
+    OUT_BATCH(0);							\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
+    OUT_BATCH_TABLE((data), h.vectors.count);				\
+  } while(0)
+
+#define OUT_SCL(hdr, data) do {					\
+    drm_radeon_cmd_header_t h;						\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
+    OUT_BATCH_TABLE((data), h.scalars.count);				\
+  } while(0)
+
+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_SCL(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
 
 
+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
 
-/* Initialize the context's hardware state.
- */
-void radeonInitState( radeonContextPtr rmesa )
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+
+
+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
 {
-   GLcontext *ctx = rmesa->glCtx;
-   GLuint color_fmt, depth_fmt, i;
-   GLint drawPitch, drawOffset;
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+   OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+   END_BATCH();
+}
 
-   switch ( rmesa->radeonScreen->cpp ) {
-   case 2:
-      color_fmt = RADEON_COLOR_FORMAT_RGB565;
-      break;
-   case 4:
-      color_fmt = RADEON_COLOR_FORMAT_ARGB8888;
-      break;
-   default:
-      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
-      exit( -1 );
+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   struct radeon_renderbuffer *rrb;
+   uint32_t cbpitch;
+   uint32_t zbpitch, depth_fmt;
+   uint32_t dwords = atom->check(ctx, atom);
+
+   /* output the first 7 bytes of context */
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 5);
+
+   rrb = radeon_get_depthbuffer(&r100->radeon);
+   if (!rrb) {
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+   } else {
+     zbpitch = (rrb->pitch / rrb->cpp);
+     if (r100->using_hyperz)
+       zbpitch |= RADEON_DEPTH_HYPERZ;
+
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(zbpitch);
+     if (rrb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+   }
+     
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+      OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
+   } else {
+      atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+      if (rrb->cpp == 4)
+         atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+      else
+         atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+
+      OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+      OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
    }
 
-   rmesa->state.color.clear = 0x00000000;
+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
+
+   if (!rrb || !rrb->bo) {
+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
+   } else {
+     cbpitch = (rrb->pitch / rrb->cpp);
+     if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= RADEON_COLOR_TILE_ENABLE;
+     OUT_BATCH(cbpitch);
+   }
+
+   END_BATCH();
+}
+
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t dwords;
+
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      return 0;
+   }
+
+   drb = radeon_get_depthbuffer(&r100->radeon);
+
+   dwords = 10;
+   if (drb)
+     dwords += 6;
+   if (rrb)
+     dwords += 8;
+
+   return dwords;
+}
+
+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t cbpitch = 0;
+   uint32_t zbpitch = 0;
+   uint32_t dwords = atom->check(ctx, atom);
+   uint32_t depth_fmt;
+
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      fprintf(stderr, "no rrb\n");
+      return;
+   }
+
+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+   if (rrb->cpp == 4)
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+   else switch (rrb->base._ActualFormat) {
+   case GL_RGB5:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+	break;
+   case GL_RGBA4:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+	break;
+   case GL_RGB5_A1:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+	break;
+   }
+
+   cbpitch = (rrb->pitch / rrb->cpp);
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= R200_COLOR_TILE_ENABLE;
+
+   drb = radeon_get_depthbuffer(&r100->radeon);
+   if (drb) {
+     zbpitch = (drb->pitch / drb->cpp);
+     if (drb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+     
+   }
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   /* In the CS case we need to split this up */
+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
+
+   if (drb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+     OUT_BATCH(zbpitch);
+   }
+
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+
+   if (rrb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
+   // }
+
+   END_BATCH();
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+   OUT_BATCH(0);
+   OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+   if (rrb) {
+       OUT_BATCH(((rrb->base.Width - 1) << RADEON_RE_WIDTH_SHIFT) |
+                 ((rrb->base.Height - 1) << RADEON_RE_HEIGHT_SHIFT));
+   } else {
+       OUT_BATCH(0);
+   }
+   END_BATCH();
+}
+
+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+	return;
+
+   if (!t)
+	return;
+
+   if (!t->mt)
+	return;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 3);
+   lvl = &t->mt->levels[0];
+   for (j = 0; j < 5; j++) {
+	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+			RADEON_GEM_DOMAIN_VRAM, 0, 0);
+   }
+   END_BATCH();
+}
+
+static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   uint32_t base_reg;
+
+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+	return;
+
+   if (!t)
+	return;
+
+   if (!t->mt)
+	return;
+
+   switch(i) {
+	case 1: base_reg = RADEON_PP_CUBIC_OFFSET_T1_0; break;
+	case 2: base_reg = RADEON_PP_CUBIC_OFFSET_T2_0; break;
+	default:
+	case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break;
+   };
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 2);
+   lvl = &t->mt->levels[0];
+   for (j = 0; j < 5; j++) {
+	OUT_BATCH(CP_PACKET0(base_reg + (4 * j), 0));
+	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+   }
+   END_BATCH();
+}
+
+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->cmd_size;
+   int i = atom->idx;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   if (t && t->mt && !t->image_override)
+     dwords += 2;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   OUT_BATCH_TABLE(atom->cmd, 3);
+   if (t && t->mt && !t->image_override) {
+     if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+   	lvl = &t->mt->levels[0];
+	OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     } else {
+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
+   } else if (!t) {
+     /* workaround for old CS mechanism */
+     OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
+     //     OUT_BATCH(r100->radeon.radeonScreen);
+   } else {
+     OUT_BATCH(t->override_offset);
+   }
+
+   OUT_BATCH_TABLE((atom->cmd+4), 5);
+   END_BATCH();
+}
+
+static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->cmd_size;
+   int i = atom->idx;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   int hastexture = 1;
+
+   if (!t)
+	hastexture = 0;
+   else {
+	if (!t->mt && !t->bo)
+		hastexture = 0;
+   }
+   dwords += 1;
+   if (hastexture)
+     dwords += 2;
+   else
+     dwords -= 2;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXFILTER_0 + (24 * i), 1));
+   OUT_BATCH_TABLE((atom->cmd + 1), 2);
+
+   if (hastexture) {
+     OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0));
+     if (t->mt && !t->image_override) {
+        if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+            lvl = &t->mt->levels[0];
+	    OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+        } else {
+           OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+        }
+      } else {
+	if (t->bo)
+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      }
+   }
+
+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXCBLEND_0 + (i * 24), 1));
+   OUT_BATCH_TABLE((atom->cmd+4), 2);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_BORDER_COLOR_0 + (i * 4), 0));
+   OUT_BATCH((atom->cmd[TEX_PP_BORDER_COLOR]));
+   END_BATCH();
+}
+
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( r100ContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->radeon.glCtx;
+   GLuint i;
+
+   rmesa->radeon.state.color.clear = 0x00000000;
 
    switch ( ctx->Visual.depthBits ) {
    case 16:
-      rmesa->state.depth.clear = 0x0000ffff;
-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
-      depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
-      rmesa->state.stencil.clear = 0x00000000;
+      rmesa->radeon.state.depth.clear = 0x0000ffff;
+      rmesa->radeon.state.stencil.clear = 0x00000000;
       break;
    case 24:
-      rmesa->state.depth.clear = 0x00ffffff;
-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
-      depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
-      rmesa->state.stencil.clear = 0xffff0000;
+      rmesa->radeon.state.depth.clear = 0x00ffffff;
+      rmesa->radeon.state.stencil.clear = 0xffff0000;
       break;
    default:
-      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
-	       ctx->Visual.depthBits );
-      exit( -1 );
+      break;
    }
 
-   /* Only have hw stencil when depth buffer is 24 bits deep */
-   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
-				     ctx->Visual.depthBits == 24 );
+   rmesa->radeon.Fallback = 0;
 
-   rmesa->Fallback = 0;
 
-   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
-      drawOffset = rmesa->radeonScreen->backOffset;
-      drawPitch  = rmesa->radeonScreen->backPitch;
-   } else {
-      drawOffset = rmesa->radeonScreen->frontOffset;
-      drawPitch  = rmesa->radeonScreen->frontPitch;
-   }
+   rmesa->radeon.hw.max_state_size = 0;
 
-   rmesa->hw.max_state_size = 0;
-
-#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
+#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX )		\
    do {								\
       rmesa->hw.ATOM.cmd_size = SZ;				\
-      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
-      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
-      rmesa->hw.ATOM.name = NM;					\
+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
+      rmesa->hw.ATOM.name = NM;						\
       rmesa->hw.ATOM.is_tcl = FLAG;					\
       rmesa->hw.ATOM.check = check_##CHK;				\
-      rmesa->hw.ATOM.dirty = GL_TRUE;				\
-      rmesa->hw.max_state_size += SZ * sizeof(int);		\
+      rmesa->hw.ATOM.dirty = GL_TRUE;					\
+      rmesa->hw.ATOM.idx = IDX;					\
+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
    } while (0)
-      
-      
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )		\
+   ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
+
    /* Allocate state buffers:
     */
-   ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
+   ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE, "CTX/context", 0 );
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     rmesa->hw.ctx.emit = ctx_emit_cs;
+     rmesa->hw.ctx.check = check_always_ctx;
+   } else
+     rmesa->hw.ctx.emit = ctx_emit;
    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
    ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
@@ -229,82 +723,133 @@ void radeonInitState( radeonContextPtr rmesa )
    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
    ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
    ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
-   ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
-   ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
-   ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
-   ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
-   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
-   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
-   ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
-   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+      ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 );
+      ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 );
+      ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+      ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+      ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+      ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+      ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+      ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 );
+      ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+      ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+      ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+      ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+      ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 );
+      ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
+      ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 );
+      ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 );
+      ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 );
+      ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 );
+      ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+      ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 );
+      ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+      ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+      ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+      ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+      ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+      ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   } else {
+      ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+      ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+      ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+      ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+      ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+      ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+      ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+      ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+      ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+      ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+      ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+      ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+      ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+      ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+      ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+      ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+      ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+      ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+      ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+      ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+      ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+      ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+      ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+      ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+      ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+      ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+      ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+       ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
+   }
+   
+   for (i = 0; i < 3; i++) {
+      if (rmesa->radeon.radeonScreen->kernel_mm)
+          rmesa->hw.tex[i].emit = tex_emit_cs;
+      else
+          rmesa->hw.tex[i].emit = tex_emit;
+   }
+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
    {
-      ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
-      ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
-      ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
+      if (rmesa->radeon.radeonScreen->kernel_mm) {
+         ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+         ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+         ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+         for (i = 0; i < 3; i++)
+            rmesa->hw.cube[i].emit = cube_emit_cs;
+      } else {
+         ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+         ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+         ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+         for (i = 0; i < 3; i++)
+            rmesa->hw.cube[i].emit = cube_emit;
+      }
    }
    else
    {
-      ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
-      ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
-      ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
-   }
-   ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
-   ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
-   ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
-   ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
-   ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
-   ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
-   ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
-   ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
-   ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
-   ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
-   ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
-   ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
-   ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
-   ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
-   ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
-   ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
-   ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
-   ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
-   ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
-   ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
-   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
-   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
-   ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
+      ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+      ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+      ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+   }
+   ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
+   ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
+   ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
 
    radeonSetUpAtomList( rmesa );
 
    /* Fill in the packet headers:
     */
-   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
-   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
-   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
-   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
-   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
-   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
-   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
-   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
-   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
-   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
-   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
-   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
-   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
-   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
-   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
-   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
-   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
-   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
-   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
-   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
-   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2);
-   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
-   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
-   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
+   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
    rmesa->hw.mtl.cmd[MTL_CMD_0] = 
-      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
-   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
-   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
-   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
+      cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
+   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
    rmesa->hw.grd.cmd[GRD_CMD_0] = 
       cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
    rmesa->hw.fog.cmd[FOG_CMD_0] = 
@@ -331,6 +876,26 @@ void radeonInitState( radeonContextPtr rmesa )
 	 cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
    }
 
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+      rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+      rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+
+      rmesa->hw.grd.emit = scl_emit;
+      rmesa->hw.fog.emit = vec_emit;
+      rmesa->hw.glt.emit = vec_emit;
+      rmesa->hw.eye.emit = vec_emit;
+      
+      for (i = 0; i < 6; i++)
+	 rmesa->hw.mat[i].emit = vec_emit;
+
+      for (i = 0; i < 8; i++)
+	 rmesa->hw.lit[i].emit = lit_emit;
+
+      for (i = 0; i < 6; i++)
+	 rmesa->hw.ucp[i].emit = vec_emit;
+   }
+
    rmesa->last_ReallyEnabled = -1;
 
    /* Initial Harware state:
@@ -352,19 +917,7 @@ void radeonInitState( radeonContextPtr rmesa )
 					    RADEON_SRC_BLEND_GL_ONE |
 					    RADEON_DST_BLEND_GL_ZERO );
 
-   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
-      rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation;
-
-   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
-      ((rmesa->radeonScreen->depthPitch &
-	RADEON_DEPTHPITCH_MASK) |
-       RADEON_DEPTH_ENDIAN_NO_SWAP);
-       
-   if (rmesa->using_hyperz)
-       rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ;
-
-   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt |
-					       RADEON_Z_TEST_LESS |
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS |
 					       RADEON_STENCIL_TEST_ALWAYS |
 					       RADEON_STENCIL_FAIL_KEEP |
 					       RADEON_STENCIL_ZPASS_KEEP |
@@ -374,7 +927,7 @@ void radeonInitState( radeonContextPtr rmesa )
    if (rmesa->using_hyperz) {
        rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
 						   RADEON_Z_DECOMPRESSION_ENABLE;
-      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
 	 /* works for q3, but slight rendering errors with glxgears ? */
 /*	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
 	 /* need this otherwise get lots of lockups with q3 ??? */
@@ -386,10 +939,9 @@ void radeonInitState( radeonContextPtr rmesa )
 				     RADEON_ANTI_ALIAS_NONE);
 
    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
-				       color_fmt |
 				       RADEON_ZBLOCK16);
 
-   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
    case DRI_CONF_DITHER_XERRORDIFFRESET:
       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
       break;
@@ -397,30 +949,17 @@ void radeonInitState( radeonContextPtr rmesa )
       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
       break;
    }
-   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
 	DRI_CONF_ROUND_ROUND )
-      rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE;
+      rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
    else
-      rmesa->state.color.roundEnable = 0;
-   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
+      rmesa->radeon.state.color.roundEnable = 0;
+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
 	DRI_CONF_COLOR_REDUCTION_DITHER )
       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
    else
-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
-
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
-					       rmesa->radeonScreen->fbLocation)
-					      & RADEON_COLOROFFSET_MASK);
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
 
-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
-					      RADEON_COLORPITCH_MASK) |
-					     RADEON_COLOR_ENDIAN_NO_SWAP);
-
-
-   /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
-   if (rmesa->sarea->tiling_enabled) {
-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
-   }
 
    rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
 				     RADEON_BFACE_SOLID |
@@ -444,7 +983,7 @@ void radeonInitState( radeonContextPtr rmesa )
   					    RADEON_VC_NO_SWAP;
 #endif
 
-   if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
      rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
    }
 
@@ -491,8 +1030,8 @@ void radeonInitState( radeonContextPtr rmesa )
 	   (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
 
       /* Initialize the texture offset to the start of the card texture heap */
-      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      //      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
+      //	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
 
       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
       rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =  
@@ -513,15 +1052,15 @@ void radeonInitState( radeonContextPtr rmesa )
 
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
    }
 
    /* Can only add ST1 at the time of doing some multitex but can keep
@@ -612,6 +1151,14 @@ void radeonInitState( radeonContextPtr rmesa )
    rmesa->hw.eye.cmd[EYE_Y] = 0;
    rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
    rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
-   
-   rmesa->hw.all_dirty = GL_TRUE;
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      radeon_init_query_stateobj(&rmesa->radeon, R100_QUERYOBJ_CMDSIZE);
+      rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+      rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_DATA_0] = 0;
+   }
+     
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+   rcommonInitCmdBuf(&rmesa->radeon);
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index ebea1fecdca..e61f59eaeaf 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/enums.h"
 #include "main/imports.h"
 #include "main/macros.h"
+#include "main/simple_list.h"
 
 #include "swrast_setup/swrast_setup.h"
 #include "math/m_translate.h"
@@ -50,10 +51,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_state.h"
 #include "radeon_swtcl.h"
 #include "radeon_tcl.h"
+#include "radeon_debug.h"
 
 
-static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
-
 /* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
 /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
 #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
@@ -64,18 +64,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
 
 #define EMIT_ATTR( ATTR, STYLE, F0 )					\
 do {									\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
-   rmesa->swtcl.vertex_attr_count++;					\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
    fmt_0 |= F0;								\
 } while (0)
 
 #define EMIT_PAD( N )							\
 do {									\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
-   rmesa->swtcl.vertex_attr_count++;					\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
 } while (0)
 
 static GLuint radeon_cp_vc_frmts[3][2] =
@@ -87,7 +87,7 @@ static GLuint radeon_cp_vc_frmts[3][2] =
 
 static void radeonSetVertexFormat( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    DECLARE_RENDERINPUTS(index_bitset);
@@ -106,7 +106,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
    }
 
    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
-   rmesa->swtcl.vertex_attr_count = 0;
+   rmesa->radeon.swtcl.vertex_attr_count = 0;
 
    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
     * build up a hardware vertex.
@@ -204,33 +204,52 @@ static void radeonSetVertexFormat( GLcontext *ctx )
       }
    }
 
-   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
 	fmt_0 != rmesa->swtcl.vertex_format) {
       RADEON_NEWPRIM(rmesa);
       rmesa->swtcl.vertex_format = fmt_0;
-      rmesa->swtcl.vertex_size =
+      rmesa->radeon.swtcl.vertex_size =
 	  _tnl_install_attrs( ctx,
-			      rmesa->swtcl.vertex_attrs, 
-			      rmesa->swtcl.vertex_attr_count,
+			      rmesa->radeon.swtcl.vertex_attrs, 
+			      rmesa->radeon.swtcl.vertex_attr_count,
 			      NULL, 0 );
-      rmesa->swtcl.vertex_size /= 4;
-      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-      if (RADEON_DEBUG & DEBUG_VERTS)
-	 fprintf( stderr, "%s: vertex_size= %d floats\n",
-		  __FUNCTION__, rmesa->swtcl.vertex_size);
+      rmesa->radeon.swtcl.vertex_size /= 4;
+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+      radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+	  "%s: vertex_size= %d floats\n",  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
    }
 }
 
+static void radeon_predict_emit_size( r100ContextPtr rmesa )
+{
+
+    if (!rmesa->radeon.swtcl.emit_prediction) {
+        const int state_size = radeonCountStateEmitSize( &rmesa->radeon );
+        const int scissor_size = 8;
+        const int prims_size = 8;
+        const int vertex_size = 7;
+
+        if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                    state_size +
+                    (scissor_size + prims_size + vertex_size),
+                    __FUNCTION__))
+            rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon );
+        else
+            rmesa->radeon.swtcl.emit_prediction = state_size;
+        rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size
+            + rmesa->radeon.cmdbuf.cs->cdw;
+    }
+}
 
 static void radeonRenderStart( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+    r100ContextPtr rmesa = R100_CONTEXT( ctx );
 
-   radeonSetVertexFormat( ctx );
-   
-   if (rmesa->dma.flush != 0 && 
-       rmesa->dma.flush != flush_last_swtcl_prim)
-      rmesa->dma.flush( rmesa );
+    radeonSetVertexFormat( ctx );
+
+    if (rmesa->radeon.dma.flush != 0 &&
+            rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
+        rmesa->radeon.dma.flush( ctx );
 }
 
 
@@ -241,7 +260,7 @@ static void radeonRenderStart( GLcontext *ctx )
  */
 void radeonChooseVertexState( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
    TNLcontext *tnl = TNL_CONTEXT(ctx);
 
    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
@@ -254,7 +273,7 @@ void radeonChooseVertexState( GLcontext *ctx )
     * rasterization fallback.  As this function will be called again when we
     * leave a rasterization fallback, we can just skip it for now.
     */
-   if (rmesa->Fallback != 0)
+   if (rmesa->radeon.Fallback != 0)
       return;
 
    /* HW perspective divide is a win, but tiny vertex formats are a
@@ -281,80 +300,33 @@ void radeonChooseVertexState( GLcontext *ctx )
    }
 }
 
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
 {
-   if (RADEON_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
-   rmesa->dma.flush = NULL;
 
-   if (rmesa->dma.current.buf) {
-      struct radeon_dma_region *current = &rmesa->dma.current;
-      GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
-			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
-			       current->start);
 
-      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitVertexAOS( rmesa,
+			rmesa->radeon.swtcl.vertex_size,
+			first_elem(&rmesa->radeon.dma.reserved)->bo,
+			current_offset);
 
-      assert (current->start + 
-	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-	      current->ptr);
+		      
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->swtcl.vertex_format,
+		       rmesa->radeon.swtcl.hw_primitive,
+		       rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+	 " We might overflow  command buffer.\n",
+	 rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
 
-      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
-	 radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
-			          rmesa->hw.max_state_size + VBUF_BUFSZ );
 
-	 radeonEmitVertexAOS( rmesa,
-			      rmesa->swtcl.vertex_size,
-			      current_offset);
+   rmesa->radeon.swtcl.emit_prediction = 0;
 
-	 radeonEmitVbufPrim( rmesa,
-			     rmesa->swtcl.vertex_format,
-			     rmesa->swtcl.hw_primitive,
-			     rmesa->swtcl.numverts);
-      }
-
-      rmesa->swtcl.numverts = 0;
-      current->start = current->ptr;
-   }
 }
 
-
-/* Alloc space in the current dma region.
- */
-static INLINE void *
-radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
-{
-   GLuint bytes = vsize * nverts;
-
-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
-      radeonRefillCurrentDmaRegion( rmesa );
-
-   if (!rmesa->dma.flush) {
-      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-      rmesa->dma.flush = flush_last_swtcl_prim;
-   }
-
-   assert( vsize == rmesa->swtcl.vertex_size * 4 );
-   assert( rmesa->dma.flush == flush_last_swtcl_prim );
-   assert (rmesa->dma.current.start + 
-	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
-	   rmesa->dma.current.ptr);
-
-
-   {
-      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
-      rmesa->dma.current.ptr += bytes;
-      rmesa->swtcl.numverts += nverts;
-      return head;
-   }
-
-}
-
-
 /*
  * Render unclipped vertex buffers by emitting vertices directly to
  * dma buffers.  Use strip/fan hardware primitives where possible.
@@ -387,22 +359,31 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
 };
 
 static INLINE void
-radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
 {
    RADEON_NEWPRIM( rmesa );
-   rmesa->swtcl.hw_primitive = hw_prim[prim];
-   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
+   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
+}
+
+static void* radeon_alloc_verts( r100ContextPtr rmesa , GLuint nr, GLuint size )
+{
+   void *rv;
+   do {
+     radeon_predict_emit_size( rmesa );
+     rv = rcommonAllocDmaLowVerts( &rmesa->radeon, nr, size );
+   } while (!rv);
+   return rv;
 }
 
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
 #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
 #define FLUSH()  RADEON_NEWPRIM( rmesa )
-#define GET_CURRENT_VB_MAX_VERTS() \
-  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_CURRENT_VB_MAX_VERTS()					10\
+//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
 #define GET_SUBSEQUENT_VB_MAX_VERTS() \
-  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
-#define ALLOC_VERTS( nr ) \
-  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
+  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
+#define ALLOC_VERTS( nr ) radeon_alloc_verts( rmesa, nr, rmesa->radeon.swtcl.vertex_size * 4 )
 #define EMIT_VERTS( ctx, j, nr, buf ) \
   _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
 
@@ -418,16 +399,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
 static GLboolean radeon_run_render( GLcontext *ctx,
 				    struct tnl_pipeline_stage *stage )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    tnl_render_func *tab = TAG(render_tab_verts);
    GLuint i;
 
-   if (rmesa->swtcl.indexed_verts.buf) 
-      RELEASE_ELT_VERTS();
-   	
-   if (rmesa->swtcl.RenderIndex != 0 ||   
+   if (rmesa->radeon.swtcl.RenderIndex != 0 ||   
        !radeon_dma_validate_render( ctx, VB ))
       return GL_TRUE;		
 
@@ -442,8 +420,8 @@ static GLboolean radeon_run_render( GLcontext *ctx,
       if (!length)
 	 continue;
 
-      if (RADEON_DEBUG & DEBUG_PRIMS)
-	 fprintf(stderr, "radeon_render.c: prim %s %d..%d\n", 
+      radeon_print(RADEON_SWRENDER, RADEON_NORMAL,
+	  "radeon_render.c: prim %s %d..%d\n",
 		 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
 		 start, start+length);
 
@@ -496,13 +474,13 @@ static void radeonResetLineStipple( GLcontext *ctx );
 
 #undef LOCAL_VARS
 #undef ALLOC_VERTS
-#define CTX_ARG radeonContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
+#define CTX_ARG r100ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeon_alloc_verts( rmesa, n, (size) * 4 )
 #undef LOCAL_VARS
 #define LOCAL_VARS						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
-   const char *radeonverts = (char *)rmesa->swtcl.verts;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
 #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
 #define VERTEX radeonVertex 
 #undef TAG
@@ -560,7 +538,7 @@ static struct {
 #define VERT_Y(_v) _v->v.y
 #define VERT_Z(_v) _v->v.z
 #define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
 
 #define VERT_SET_RGBA( v, c )  					\
 do {								\
@@ -606,8 +584,8 @@ do {							\
 #undef INIT
 
 #define LOCAL_VARS(n)							\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
-   GLuint color[n], spec[n];						\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);			\
+   GLuint color[n] = {0}, spec[n] = {0};						\
    GLuint coloroffset = rmesa->swtcl.coloroffset;	\
    GLuint specoffset = rmesa->swtcl.specoffset;			\
    (void) color; (void) spec; (void) coloroffset; (void) specoffset;
@@ -617,7 +595,7 @@ do {							\
  ***********************************************************************/
 
 #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
 #undef TAG
 #define TAG(x) x
 #include "tnl_dd/t_dd_unfilled.h"
@@ -673,9 +651,9 @@ static void init_rast_tab( void )
 } while (0)
 #undef LOCAL_VARS
 #define LOCAL_VARS						\
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
-   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
-   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;		\
    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
    const GLboolean stipple = ctx->Line.StippleFlag;		\
    (void) elt; (void) stipple;
@@ -700,17 +678,17 @@ static void init_rast_tab( void )
 void radeonChooseRenderState( GLcontext *ctx )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint index = 0;
    GLuint flags = ctx->_TriangleCaps;
 
-   if (!rmesa->TclFallback || rmesa->Fallback) 
+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
       return;
 
    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
    if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
 
-   if (index != rmesa->swtcl.RenderIndex) {
+   if (index != rmesa->radeon.swtcl.RenderIndex) {
       tnl->Driver.Render.Points = rast_tab[index].points;
       tnl->Driver.Render.Line = rast_tab[index].line;
       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
@@ -727,7 +705,7 @@ void radeonChooseRenderState( GLcontext *ctx )
 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
       }
 
-      rmesa->swtcl.RenderIndex = index;
+      rmesa->radeon.swtcl.RenderIndex = index;
    }
 }
 
@@ -739,18 +717,18 @@ void radeonChooseRenderState( GLcontext *ctx )
 
 static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
-   if (rmesa->swtcl.hw_primitive != hwprim) {
+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
       RADEON_NEWPRIM( rmesa );
-      rmesa->swtcl.hw_primitive = hwprim;
+      rmesa->radeon.swtcl.hw_primitive = hwprim;
    }
 }
 
 static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   rmesa->swtcl.render_primitive = prim;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   rmesa->radeon.swtcl.render_primitive = prim;
    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
       radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
 }
@@ -761,7 +739,7 @@ static void radeonRenderFinish( GLcontext *ctx )
 
 static void radeonResetLineStipple( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    RADEON_STATECHANGE( rmesa, lin );
 }
 
@@ -795,25 +773,25 @@ static const char *getFallbackString(GLuint bit)
 
 void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
-   GLuint oldfallback = rmesa->Fallback;
+   GLuint oldfallback = rmesa->radeon.Fallback;
 
    if (mode) {
-      rmesa->Fallback |= bit;
+      rmesa->radeon.Fallback |= bit;
       if (oldfallback == 0) {
-	 RADEON_FIREVERTICES( rmesa );
+	 radeon_firevertices(&rmesa->radeon);
 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
 	 _swsetup_Wakeup( ctx );
-	 rmesa->swtcl.RenderIndex = ~0;
-         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+	 rmesa->radeon.swtcl.RenderIndex = ~0;
+         if (RADEON_DEBUG & RADEON_FALLBACKS) {
             fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
                     bit, getFallbackString(bit));
          }
       }
    }
    else {
-      rmesa->Fallback &= ~bit;
+      rmesa->radeon.Fallback &= ~bit;
       if (oldfallback == bit) {
 	 _swrast_flush( ctx );
 	 tnl->Driver.Render.Start = radeonRenderStart;
@@ -826,18 +804,18 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 
 	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
-	 if (rmesa->TclFallback) {
-	    /* These are already done if rmesa->TclFallback goes to
+	 if (rmesa->radeon.TclFallback) {
+	    /* These are already done if rmesa->radeon.TclFallback goes to
 	     * zero above. But not if it doesn't (RADEON_NO_TCL for
 	     * example?)
 	     */
 	    _tnl_invalidate_vertex_state( ctx, ~0 );
 	    _tnl_invalidate_vertices( ctx, ~0 );
-	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
 	    radeonChooseVertexState( ctx );
 	    radeonChooseRenderState( ctx );
 	 }
-         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+         if (RADEON_DEBUG & RADEON_FALLBACKS) {
             fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
                     bit, getFallbackString(bit));
          }
@@ -853,13 +831,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 void radeonInitSwtcl( GLcontext *ctx )
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    static int firsttime = 1;
 
    if (firsttime) {
       init_rast_tab();
       firsttime = 0;
    }
+   rmesa->radeon.swtcl.emit_prediction = 0;
 
    tnl->Driver.Render.Start = radeonRenderStart;
    tnl->Driver.Render.Finish = radeonRenderFinish;
@@ -872,18 +851,9 @@ void radeonInitSwtcl( GLcontext *ctx )
    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
 		       RADEON_MAX_TNL_VERTEX_SIZE);
    
-   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
-   rmesa->swtcl.RenderIndex = ~0;
-   rmesa->swtcl.render_primitive = GL_TRIANGLES;
-   rmesa->swtcl.hw_primitive = 0;
+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->radeon.swtcl.hw_primitive = 0;
 }
 
-
-void radeonDestroySwtcl( GLcontext *ctx )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if (rmesa->swtcl.indexed_verts.buf) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-			      __FUNCTION__ );
-}
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
index e485052ad77..da89158eeb9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
@@ -40,7 +40,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_context.h"
 
 extern void radeonInitSwtcl( GLcontext *ctx );
-extern void radeonDestroySwtcl( GLcontext *ctx );
 
 extern void radeonChooseRenderState( GLcontext *ctx );
 extern void radeonChooseVertexState( GLcontext *ctx );
@@ -63,5 +62,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
 
 extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
 
-
+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
 #endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index 779e9ae5df0..b334ea05e5b 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 
+#include "radeon_common.h"
 #include "radeon_context.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_tcl.h"
 #include "radeon_swtcl.h"
 #include "radeon_maos.h"
+#include "radeon_common_context.h"
 
 
 
@@ -104,7 +106,7 @@ static GLboolean discrete_prim[0x10] = {
 };
    
 
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
 #define ELT_TYPE  GLushort
 
 #define ELT_INIT(prim, hw_prim) \
@@ -125,7 +127,7 @@ static GLboolean discrete_prim[0x10] = {
 
 #define RESET_STIPPLE() do {			\
    RADEON_STATECHANGE( rmesa, lin );		\
-   radeonEmitState( rmesa );			\
+   radeonEmitState(&rmesa->radeon);			\
 } while (0)
 
 #define AUTO_STIPPLE( mode )  do {		\
@@ -136,31 +138,26 @@ static GLboolean discrete_prim[0x10] = {
    else						\
       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
 	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
-   radeonEmitState( rmesa );			\
+   radeonEmitState(&rmesa->radeon);		\
 } while (0)
 
 
 
 #define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
 
-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) 
+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
 {
-   if (rmesa->dma.flush)
-      rmesa->dma.flush( rmesa );
+      if (rmesa->radeon.dma.flush)
+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
 
-   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
-			   rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
+      radeonEmitAOS( rmesa,
+		     rmesa->radeon.tcl.aos_count, 0 );
 
-   radeonEmitAOS( rmesa,
-		rmesa->tcl.aos_components,
-		rmesa->tcl.nr_aos_components, 0 );
-
-   return radeonAllocEltsOpenEnded( rmesa,
-				    rmesa->tcl.vertex_format, 
-				    rmesa->tcl.hw_primitive, nr );
+      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
+				       rmesa->tcl.hw_primitive, nr );
 }
 
-#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
+#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
 
 
 
@@ -174,15 +171,11 @@ static void radeonEmitPrim( GLcontext *ctx,
 		       GLuint start, 
 		       GLuint count)	
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
    radeonTclPrimitive( ctx, prim, hwprim );
    
-   radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
-			    rmesa->hw.max_state_size + VBUF_BUFSZ );
-
    radeonEmitAOS( rmesa,
-		  rmesa->tcl.aos_components,
-		  rmesa->tcl.nr_aos_components,
+		  rmesa->radeon.tcl.aos_count,
 		  start );
    
    /* Why couldn't this packet have taken an offset param?
@@ -197,6 +190,8 @@ static void radeonEmitPrim( GLcontext *ctx,
    radeonEmitPrim( ctx, prim, hwprim, start, count );           \
    (void) rmesa; } while (0)
 
+#define MAX_CONVERSION_SIZE 40
+
 /* Try & join small primitives
  */
 #if 0
@@ -254,7 +249,7 @@ void radeonTclPrimitive( GLcontext *ctx,
 			 GLenum prim,
 			 int hw_prim )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint se_cntl;
    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
 
@@ -361,6 +356,73 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
    }
 }
 
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs )
+{
+  r100ContextPtr rmesa = R100_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+  int i;
+  /* list of flags that are allocating aos object */
+  const GLuint flags_to_check[] = {
+    VERT_BIT_NORMAL,
+    VERT_BIT_COLOR0,
+    VERT_BIT_COLOR1,
+    VERT_BIT_FOG
+  };
+  /* predict number of aos to emit */
+  for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+  {
+    if (inputs & flags_to_check[i])
+      ++nr_aos;
+  }
+  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+  {
+    if (inputs & VERT_BIT_TEX(i))
+      ++nr_aos;
+  }
+
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+    if (!rmesa->hw.tcl.dirty)
+      state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl );
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      if (!VB->Primitive[i].count)
+	continue;
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+	 rendering code may decide convert to elts.
+	 In that case we have to make pessimistic prediction.
+	 and use larger of 2 paths. */
+      const GLuint elts = ELTS_BUFSZ(nr_aos);
+      const GLuint index = INDEX_BUFSZ;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+	  || vbuf > index + elts)
+	space_required += vbuf;
+      else
+	space_required += index + elts;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+    space_required += SCISSOR_BUFSZ;
+  }
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
+}
+
 /**********************************************************************/
 /*                          Render pipeline stage                     */
 /**********************************************************************/
@@ -371,7 +433,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
 static GLboolean radeon_run_tcl_render( GLcontext *ctx,
 					struct tnl_pipeline_stage *stage )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
@@ -379,7 +441,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
 
    /* TODO: separate this from the swtnl pipeline 
     */
-   if (rmesa->TclFallback)
+   if (rmesa->radeon.TclFallback)
       return GL_TRUE;	/* fallback to software t&l */
 
    if (VB->Count == 0)
@@ -411,6 +473,8 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
    }
 
    radeonReleaseArrays( ctx, ~0 );
+   GLuint emit_end = radeonEnsureEmitSize( ctx, inputs )
+     + rmesa->radeon.cmdbuf.cs->cdw;
    radeonEmitArrays( ctx, inputs );
 
    rmesa->tcl.Elts = VB->Elts;
@@ -430,6 +494,10 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
 	 radeonEmitPrimitive( ctx, start, start+length, prim );
    }
 
+   if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+	  " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
    return GL_FALSE;		/* finished the pipe */
 }
 
@@ -461,7 +529,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
 
 static void transition_to_swtnl( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    GLuint se_cntl;
 
@@ -490,7 +558,7 @@ static void transition_to_swtnl( GLcontext *ctx )
 
 static void transition_to_hwtnl( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 
@@ -509,17 +577,17 @@ static void transition_to_hwtnl( GLcontext *ctx )
 
    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
 
-   if ( rmesa->dma.flush )			
-      rmesa->dma.flush( rmesa );	
+   if ( rmesa->radeon.dma.flush )			
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
 
-   rmesa->dma.flush = NULL;
+   rmesa->radeon.dma.flush = NULL;
    rmesa->swtcl.vertex_format = 0;
    
-   if (rmesa->swtcl.indexed_verts.buf) 
-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
-			      __FUNCTION__ );
+   //   if (rmesa->swtcl.indexed_verts.buf) 
+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+   //			      __FUNCTION__ );
 
-   if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+   if (RADEON_DEBUG & RADEON_FALLBACKS)
       fprintf(stderr, "Radeon end tcl fallback\n");
 }
 
@@ -550,22 +618,22 @@ static char *getFallbackString(GLuint bit)
 
 void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint oldfallback = rmesa->TclFallback;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.TclFallback;
 
    if (mode) {
-      rmesa->TclFallback |= bit;
+      rmesa->radeon.TclFallback |= bit;
       if (oldfallback == 0) {
-	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	 if (RADEON_DEBUG & RADEON_FALLBACKS)
 	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
 		    getFallbackString( bit ));
 	 transition_to_swtnl( ctx );
       }
    }
    else {
-      rmesa->TclFallback &= ~bit;
+      rmesa->radeon.TclFallback &= ~bit;
       if (oldfallback == bit) {
-	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+	 if (RADEON_DEBUG & RADEON_FALLBACKS)
 	    fprintf(stderr, "Radeon end tcl fallback %s\n",
 		    getFallbackString( bit ));
 	 transition_to_hwtnl( ctx );
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index f2b6deb9c04..99865fff27b 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/texobj.h"
 
 #include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
 #include "radeon_swtcl.h"
@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
 {
    GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
 
+   /* Force revalidation to account for switches from/to mipmapping. */
+   t->validated = GL_FALSE;
+
    t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
 
    /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
-   if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) {
+   if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
       switch ( minf ) {
       case GL_NEAREST:
       case GL_NEAREST_MIPMAP_NEAREST:
@@ -249,437 +253,17 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
    t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
 }
 
-
-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
-{
-   radeonTexObjPtr t;
-
-   t = CALLOC_STRUCT( radeon_tex_obj );
-   texObj->DriverData = t;
-   if ( t != NULL ) {
-      if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
-	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t );
-      }
-
-      /* Initialize non-image-dependent parts of the state:
-       */
-      t->base.tObj = texObj;
-      t->border_fallback = GL_FALSE;
-
-      t->pp_txfilter = RADEON_BORDER_MODE_OGL;
-      t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
-			RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
-
-      make_empty_list( & t->base );
-
-      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
-      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
-      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
-      radeonSetTexBorderColor( t, texObj->BorderColor );
-   }
-
-   return t;
-}
-
-
-static const struct gl_texture_format *
-radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
-                           GLenum format, GLenum type )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   const GLboolean do32bpt =
-       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
-   const GLboolean force16bpt =
-       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
-   (void) format;
-
-   switch ( internalFormat ) {
-   case 4:
-   case GL_RGBA:
-   case GL_COMPRESSED_RGBA:
-      switch ( type ) {
-      case GL_UNSIGNED_INT_10_10_10_2:
-      case GL_UNSIGNED_INT_2_10_10_10_REV:
-	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
-      case GL_UNSIGNED_SHORT_4_4_4_4:
-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-	 return _dri_texformat_argb4444;
-      case GL_UNSIGNED_SHORT_5_5_5_1:
-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-	 return _dri_texformat_argb1555;
-      default:
-         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444;
-      }
-
-   case 3:
-   case GL_RGB:
-   case GL_COMPRESSED_RGB:
-      switch ( type ) {
-      case GL_UNSIGNED_SHORT_4_4_4_4:
-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-	 return _dri_texformat_argb4444;
-      case GL_UNSIGNED_SHORT_5_5_5_1:
-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-	 return _dri_texformat_argb1555;
-      case GL_UNSIGNED_SHORT_5_6_5:
-      case GL_UNSIGNED_SHORT_5_6_5_REV:
-	 return _dri_texformat_rgb565;
-      default:
-         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
-      }
-
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return !force16bpt ?
-	  _dri_texformat_argb8888 : _dri_texformat_argb4444;
-
-   case GL_RGBA4:
-   case GL_RGBA2:
-      return _dri_texformat_argb4444;
-
-   case GL_RGB5_A1:
-      return _dri_texformat_argb1555;
-
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
-
-   case GL_RGB5:
-   case GL_RGB4:
-   case GL_R3_G3_B2:
-      return _dri_texformat_rgb565;
-
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-   case GL_COMPRESSED_ALPHA:
-      return _dri_texformat_a8;
-
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-   case GL_COMPRESSED_LUMINANCE:
-      return _dri_texformat_l8;
-
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return _dri_texformat_al88;
-
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-   case GL_COMPRESSED_INTENSITY:
-      return _dri_texformat_i8;
-
-   case GL_YCBCR_MESA:
-      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
-          type == GL_UNSIGNED_BYTE)
-         return &_mesa_texformat_ycbcr;
-      else
-         return &_mesa_texformat_ycbcr_rev;
-
-   case GL_RGB_S3TC:
-   case GL_RGB4_S3TC:
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-      return &_mesa_texformat_rgb_dxt1;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-      return &_mesa_texformat_rgba_dxt1;
-
-   case GL_RGBA_S3TC:
-   case GL_RGBA4_S3TC:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-      return &_mesa_texformat_rgba_dxt3;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      return &_mesa_texformat_rgba_dxt5;
-
-   default:
-      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
-      return NULL;
-   }
-
-   return NULL; /* never get here */
-}
-
-
-static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint border,
-                              GLenum format, GLenum type, const GLvoid *pixels,
-                              const struct gl_pixelstore_attrib *packing,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   if ( t ) {
-      driSwapOutTextureObject( t );
-   }
-   else {
-      t = (driTextureObject *) radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
-         return;
-      }
-   }
-
-   /* Note, this will call ChooseTextureFormat */
-   _mesa_store_teximage1d(ctx, target, level, internalFormat,
-                          width, border, format, type, pixels,
-                          &ctx->Unpack, texObj, texImage);
-
-   t->dirty_images[0] |= (1 << level);
-}
-
-
-static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset,
-                                 GLsizei width,
-                                 GLenum format, GLenum type,
-                                 const GLvoid *pixels,
-                                 const struct gl_pixelstore_attrib *packing,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert( t ); /* this _should_ be true */
-   if ( t ) {
-      driSwapOutTextureObject( t );
-   }
-   else {
-      t = (driTextureObject *) radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
-         return;
-      }
-   }
-
-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
-			     format, type, pixels, packing, texObj,
-			     texImage);
-
-   t->dirty_images[0] |= (1 << level);
-}
-
-
-static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint border,
-                              GLenum format, GLenum type, const GLvoid *pixels,
-                              const struct gl_pixelstore_attrib *packing,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   if ( t != NULL ) {
-      driSwapOutTextureObject( t );
-   }
-   else {
-      t = (driTextureObject *) radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
-         return;
-      }
-   }
-
-   /* Note, this will call ChooseTextureFormat */
-   _mesa_store_teximage2d(ctx, target, level, internalFormat,
-                          width, height, border, format, type, pixels,
-                          &ctx->Unpack, texObj, texImage);
-
-   t->dirty_images[face] |= (1 << level);
-}
-
-
-static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset,
-                                 GLsizei width, GLsizei height,
-                                 GLenum format, GLenum type,
-                                 const GLvoid *pixels,
-                                 const struct gl_pixelstore_attrib *packing,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert( t ); /* this _should_ be true */
-   if ( t ) {
-      driSwapOutTextureObject( t );
-   }
-   else {
-      t = (driTextureObject *) radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
-         return;
-      }
-   }
-
-   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
-			     height, format, type, pixels, packing, texObj,
-			     texImage);
-
-   t->dirty_images[face] |= (1 << level);
-}
-
-static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint border,
-                              GLsizei imageSize, const GLvoid *data,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   if ( t != NULL ) {
-      driSwapOutTextureObject( t );
-   }
-   else {
-      t = (driTextureObject *) radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
-         return;
-      }
-   }
-
-   /* Note, this will call ChooseTextureFormat */
-   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
-                                 height, border, imageSize, data, texObj, texImage);
-
-   t->dirty_images[face] |= (1 << level);
-}
-
-
-static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset,
-                                 GLsizei width, GLsizei height,
-                                 GLenum format,
-                                 GLsizei imageSize, const GLvoid *data,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert( t ); /* this _should_ be true */
-   if ( t ) {
-      driSwapOutTextureObject( t );
-   }
-   else {
-      t = (driTextureObject *) radeonAllocTexObj( texObj );
-      if (!t) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
-         return;
-      }
-   }
-
-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
-                                 height, format, imageSize, data, texObj, texImage);
-
-   t->dirty_images[face] |= (1 << level);
-}
-
 #define SCALED_FLOAT_TO_BYTE( x, scale ) \
 		(((GLuint)((255.0F / scale) * (x))) / 2)
 
 static void radeonTexEnv( GLcontext *ctx, GLenum target,
 			  GLenum pname, const GLfloat *param )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint unit = ctx->Texture.CurrentUnit;
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
-   if ( RADEON_DEBUG & DEBUG_STATE ) {
+   if ( RADEON_DEBUG & RADEON_STATE ) {
       fprintf( stderr, "%s( %s )\n",
 	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
    }
@@ -706,7 +290,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
        * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
        * [0.0,4.0] to [0,127].
        */
-      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
 	  0.0 : -1.0;
       bias = CLAMP( *param, min, 4.0 );
       if ( bias == 0 ) {
@@ -739,12 +323,10 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
 				struct gl_texture_object *texObj,
 				GLenum pname, const GLfloat *params )
 {
-   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
+   radeonTexObj* t = radeon_tex_obj(texObj);
 
-   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-      fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+   radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__,
 	       _mesa_lookup_enum_by_nr( pname ) );
-   }
 
    switch ( pname ) {
    case GL_TEXTURE_MIN_FILTER:
@@ -767,57 +349,50 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
    case GL_TEXTURE_MAX_LEVEL:
    case GL_TEXTURE_MIN_LOD:
    case GL_TEXTURE_MAX_LOD:
+
       /* This isn't the most efficient solution but there doesn't appear to
        * be a nice alternative.  Since there's no LOD clamping,
        * we just have to rely on loading the right subset of mipmap levels
        * to simulate a clamped LOD.
        */
-      driSwapOutTextureObject( (driTextureObject *) t );
+      if (t->mt) {
+         radeon_miptree_unreference(t->mt);
+	 t->mt = 0;
+	 t->validated = GL_FALSE;
+      }
       break;
 
    default:
       return;
    }
-
-   /* Mark this texobj as dirty (one bit per tex unit)
-    */
-   t->dirty_state = TEX_ALL;
-}
-
-
-static void radeonBindTexture( GLcontext *ctx, GLenum target,
-			       struct gl_texture_object *texObj )
-{
-   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
-	       ctx->Texture.CurrentUnit );
-   }
-
-   assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D &&
-            target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) ||
-           (texObj->DriverData != NULL) );
 }
 
-
 static void radeonDeleteTexture( GLcontext *ctx,
 				 struct gl_texture_object *texObj )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj* t = radeon_tex_obj(texObj);
+   int i;
 
-   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
-      fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+   radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+	 "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
 	       _mesa_lookup_enum_by_nr( texObj->Target ) );
-   }
-
-   if ( t != NULL ) {
-      if ( rmesa ) {
-         RADEON_FIREVERTICES( rmesa );
-      }
 
-      driDestroyTextureObject( t );
+   if ( rmesa ) {
+     radeon_firevertices(&rmesa->radeon);
+     for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
+       if ( t == rmesa->state.texture.unit[i].texobj ) {
+	 rmesa->state.texture.unit[i].texobj = NULL;
+	 rmesa->hw.tex[i].dirty = GL_FALSE;
+	 rmesa->hw.cube[i].dirty = GL_FALSE;
+       }
+     }
    }
 
+   if (t->mt) {
+      radeon_miptree_unreference(t->mt);
+      t->mt = 0;
+   }
    /* Free mipmap images and the texture object itself */
    _mesa_delete_texture_object(ctx, texObj);
 }
@@ -837,7 +412,7 @@ static void radeonTexGen( GLcontext *ctx,
 			  GLenum pname,
 			  const GLfloat *params )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLuint unit = ctx->Texture.CurrentUnit;
    rmesa->recheck_texgen[unit] = GL_TRUE;
 }
@@ -851,29 +426,40 @@ static void radeonTexGen( GLcontext *ctx,
 static struct gl_texture_object *
 radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_texture_object *obj;
-   obj = _mesa_new_texture_object(ctx, name, target);
-   if (!obj)
-      return NULL;
-   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
-   radeonAllocTexObj( obj );
-   return obj;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+   _mesa_initialize_texture_object(&t->base, name, target);
+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+   t->border_fallback = GL_FALSE;
+
+   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+   
+   radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
+   radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
+   radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
+   radeonSetTexBorderColor( t, t->base.BorderColor );
+   return &t->base;
 }
 
 
+
 void radeonInitTextureFuncs( struct dd_function_table *functions )
 {
-   functions->ChooseTextureFormat	= radeonChooseTextureFormat;
+   functions->ChooseTextureFormat	= radeonChooseTextureFormat_mesa;
    functions->TexImage1D		= radeonTexImage1D;
    functions->TexImage2D		= radeonTexImage2D;
    functions->TexSubImage1D		= radeonTexSubImage1D;
    functions->TexSubImage2D		= radeonTexSubImage2D;
+   functions->GetTexImage               = radeonGetTexImage;
+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
 
    functions->NewTextureObject		= radeonNewTextureObject;
-   functions->BindTexture		= radeonBindTexture;
+   //   functions->BindTexture		= radeonBindTexture;
    functions->DeleteTexture		= radeonDeleteTexture;
-   functions->IsTextureResident		= driIsTextureResident;
 
    functions->TexEnv			= radeonTexEnv;
    functions->TexParameter		= radeonTexParameter;
@@ -882,5 +468,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
    functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
+   functions->GenerateMipmap = radeonGenerateMipmap;
+
+   functions->NewTextureImage = radeonNewTextureImage;
+   functions->FreeTexImageData = radeonFreeTexImageData;
+   functions->MapTexture = radeonMapTexture;
+   functions->UnmapTexture = radeonUnmapTexture;
+
    driInitTextureFormats();
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
index 80008808289..a4aaddc74fa 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -41,12 +41,16 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
                                unsigned long long offset, GLint depth,
                                GLuint pitch);
 
+extern void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+			       __DRIdrawable *dPriv);
+
 extern void radeonUpdateTextureState( GLcontext *ctx );
 
-extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
+extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
 				  GLuint face );
 
-extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
+extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
 
 extern void radeonInitTextureFuncs( struct dd_function_table *functions );
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
deleted file mode 100644
index 5f7bbe6a4cb..00000000000
--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/**************************************************************************
-
-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
-                     VA Linux Systems Inc., Fremont, California.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation on the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial
-portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Kevin E. Martin <martin@valinux.com>
- *   Gareth Hughes <gareth@valinux.com>
- *
- */
-#include <errno.h> 
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/macros.h"
-
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_tex.h"
-
-#include <unistd.h>  /* for usleep() */
-
-
-/**
- * Destroy any device-dependent state associated with the texture.  This may
- * include NULLing out hardware state that points to the texture.
- */
-void
-radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
-{
-   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj );
-   }
-
-   if ( rmesa != NULL ) {
-      unsigned   i;
-
-
-      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
-	 if ( t == rmesa->state.texture.unit[i].texobj ) {
-	    rmesa->state.texture.unit[i].texobj = NULL;
-	 }
-      }
-   }
-}
-
-
-/* ------------------------------------------------------------
- * Texture image conversions
- */
-
-
-static void radeonUploadRectSubImage( radeonContextPtr rmesa,
-				      radeonTexObjPtr t, 
-				      struct gl_texture_image *texImage,
-				      GLint x, GLint y, 
-				      GLint width, GLint height )
-{
-   const struct gl_texture_format *texFormat = texImage->TexFormat;
-   int blit_format, dstPitch, done;
-
-   switch ( texFormat->TexelBytes ) {
-   case 1:
-      blit_format = RADEON_GMC_DST_8BPP_CI;
-      break;
-   case 2:
-      blit_format = RADEON_GMC_DST_16BPP;
-      break;
-   case 4:
-      blit_format = RADEON_GMC_DST_32BPP;
-      break;
-   default:
-      fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", 
-      	       texFormat->TexelBytes);
-      return;
-   }
-
-   t->image[0][0].data = texImage->Data;
-
-   /* Currently don't need to cope with small pitches.
-    */
-   width = texImage->Width;
-   height = texImage->Height;
-   dstPitch = t->pp_txpitch + 32;
-
-   {	/* FIXME: prefer GART-texturing if possible */
-      /* Data not in GART memory, or bad pitch.
-       */
-      for (done = 0; done < height ; ) {
-	 struct radeon_dma_region region;
-	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
-	 int src_pitch;
-	 char *tex;
-
-         src_pitch = texImage->RowStride * texFormat->TexelBytes;
-
-	 tex = (char *)texImage->Data + done * src_pitch;
-
-	 memset(&region, 0, sizeof(region));
-	 radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
-
-	 /* Copy texdata to dma:
-	  */
-	 if (0)
-	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
-		    __FUNCTION__, src_pitch, dstPitch);
-
-	 if (src_pitch == dstPitch) {
-	    memcpy( region.address + region.start, tex, lines * src_pitch );
-	 } 
-	 else {
-	    char *buf = region.address + region.start;
-	    int i;
-	    for (i = 0 ; i < lines ; i++) {
-	       memcpy( buf, tex, src_pitch );
-	       buf += dstPitch;
-	       tex += src_pitch;
-	    }
-	 }
-
-	 radeonEmitWait( rmesa, RADEON_WAIT_3D );
-
-	 
-
-	 /* Blit to framebuffer
-	  */
-	 radeonEmitBlit( rmesa,
-		       blit_format,
-		       dstPitch, GET_START( &region ),
-		       dstPitch, t->bufAddr,
-		       0, 0,
-		       0, done,
-		       width, lines );
-	 
-	 radeonEmitWait( rmesa, RADEON_WAIT_2D );
-
-	 radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
-	 done += lines;
-      }
-   }
-}
-
-
-/**
- * Upload the texture image associated with texture \a t at the specified
- * level at the address relative to \a start.
- */
-static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, 
-			    GLint hwlevel,
-			    GLint x, GLint y, GLint width, GLint height,
-			    GLuint face )
-{
-   struct gl_texture_image *texImage = NULL;
-   GLuint offset;
-   GLint imageWidth, imageHeight;
-   GLint ret;
-   drm_radeon_texture_t tex;
-   drm_radeon_tex_image_t tmp;
-   const int level = hwlevel + t->base.firstLevel;
-
-   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
-      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
-	       __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face );
-   }
-
-   ASSERT(face < 6);
-
-   /* Ensure we have a valid texture to upload */
-   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
-      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
-      return;
-   }
-
-   texImage = t->base.tObj->Image[face][level];
-
-   if ( !texImage ) {
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
-      return;
-   }
-   if ( !texImage->Data ) {
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
-      return;
-   }
-
-
-   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-      assert(level == 0);
-      assert(hwlevel == 0);
-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
-	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
-      radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
-      return;
-   }
-
-   imageWidth = texImage->Width;
-   imageHeight = texImage->Height;
-
-   offset = t->bufAddr + t->base.totalSize * face / 6;
-
-   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
-      GLint imageX = 0;
-      GLint imageY = 0;
-      GLint blitX = t->image[face][hwlevel].x;
-      GLint blitY = t->image[face][hwlevel].y;
-      GLint blitWidth = t->image[face][hwlevel].width;
-      GLint blitHeight = t->image[face][hwlevel].height;
-      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
-	       imageWidth, imageHeight, imageX, imageY );
-      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
-	       blitWidth, blitHeight, blitX, blitY );
-      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
-	       (GLuint)offset, hwlevel, level );
-   }
-
-   t->image[face][hwlevel].data = texImage->Data;
-
-   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
-    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
-    * We used to use 1, 2 and 4-byte texels and used to use the texture
-    * width to dictate the blit width - but that won't work for compressed
-    * textures. (Brian)
-    * NOTE: can't do that with texture tiling. (sroland)
-    */
-   tex.offset = offset;
-   tex.image = &tmp;
-   /* copy (x,y,width,height,data) */
-   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
-
-   if (texImage->TexFormat->TexelBytes) {
-      /* use multi-byte upload scheme */
-      tex.height = imageHeight;
-      tex.width = imageWidth;
-      tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
-      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
-      tex.offset += tmp.x & ~1023;
-      tmp.x = tmp.x % 1024;
-      if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
-	 /* need something like "tiled coordinates" ? */
-	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
-	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
-	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
-      }
-      else {
-	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
-      }
-      if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
-	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
-	 /* radeon switches off macro tiling for small textures/mipmaps it seems */
-	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
-      }
-   }
-   else {
-      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
-         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
-      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
-         so the kernel module reads the right amount of data. */
-      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
-      tex.pitch = (BLIT_WIDTH_BYTES / 64);
-      tex.height = (imageHeight + 3) / 4;
-      tex.width = (imageWidth + 3) / 4;
-      switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
-      case RADEON_TXFORMAT_DXT1:
-         tex.width *= 8;
-         break;
-      case RADEON_TXFORMAT_DXT23:
-      case RADEON_TXFORMAT_DXT45:
-         tex.width *= 16;
-         break;
-      }
-   }
-
-   LOCK_HARDWARE( rmesa );
-   do {
-      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
-                                 &tex, sizeof(drm_radeon_texture_t) );
-   } while ( ret == -EAGAIN );
-
-   UNLOCK_HARDWARE( rmesa );
-
-   if ( ret ) {
-      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
-      fprintf( stderr, "   offset=0x%08x\n",
-	       offset );
-      fprintf( stderr, "   image width=%d height=%d\n",
-	       imageWidth, imageHeight );
-      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
-	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
-	       t->image[face][hwlevel].data );
-      exit( 1 );
-   }
-}
-
-
-/**
- * Upload the texture images associated with texture \a t.  This might
- * require the allocation of texture memory.
- * 
- * \param rmesa Context pointer
- * \param t Texture to be uploaded
- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
- */
-
-int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
-{
-   int numLevels;
-
-   if ( !t || t->base.totalSize == 0 || t->image_override )
-      return 0;
-
-   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
-      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
-	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
-	       t->base.firstLevel, t->base.lastLevel );
-   }
-
-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
-   if (RADEON_DEBUG & DEBUG_SYNC) {
-      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
-      radeonFinish( rmesa->glCtx );
-   }
-
-   LOCK_HARDWARE( rmesa );
-
-   if ( t->base.memBlock == NULL ) {
-      int heap;
-
-      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
-				 (driTextureObject *) t );
-      if ( heap == -1 ) {
-	 UNLOCK_HARDWARE( rmesa );
-	 return -1;
-      }
-
-      /* Set the base offset of the texture image */
-      t->bufAddr = rmesa->radeonScreen->texOffset[heap] 
-	   + t->base.memBlock->ofs;
-      t->pp_txoffset = t->bufAddr;
-
-      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
-	 /* hope it's safe to add that here... */
-	 t->pp_txoffset |= t->tile_bits;
-      }
-
-      /* Mark this texobj as dirty on all units:
-       */
-      t->dirty_state = TEX_ALL;
-   }
-
-
-   /* Let the world know we've used this memory recently.
-    */
-   driUpdateTextureLRU( (driTextureObject *) t );
-   UNLOCK_HARDWARE( rmesa );
-
-
-   /* Upload any images that are new */
-   if (t->base.dirty_images[face]) {
-      int i;
-      for ( i = 0 ; i < numLevels ; i++ ) {
-         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
-            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
-			    t->image[face][i].height, face );
-         }
-      }
-      t->base.dirty_images[face] = 0;
-   }
-
-   if (RADEON_DEBUG & DEBUG_SYNC) {
-      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
-      radeonFinish( rmesa->glCtx );
-   }
-
-   return 0;
-}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index b165205c093..9d252aa74c5 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -39,10 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/context.h"
 #include "main/macros.h"
 #include "main/texformat.h"
+#include "main/teximage.h"
 #include "main/texobj.h"
 #include "main/enums.h"
 
 #include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
 #include "radeon_state.h"
 #include "radeon_ioctl.h"
 #include "radeon_swtcl.h"
@@ -75,10 +77,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
 			     && (tx_table[f].format != 0xffffffff) )
 
-static const struct {
+struct tx_table {
    GLuint format, filter;
-}
-tx_table[] =
+};
+
+static const struct tx_table tx_table[] =
 {
    _ALPHA(RGBA8888),
    _ALPHA_REV(RGBA8888),
@@ -111,252 +114,6 @@ tx_table[] =
 #undef _ALPHA
 #undef _INVALID
 
-/**
- * This function computes the number of bytes of storage needed for
- * the given texture object (all mipmap levels, all cube faces).
- * The \c image[face][level].x/y/width/height parameters for upload/blitting
- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
- * too.
- * 
- * \param rmesa Context pointer
- * \param tObj GL texture object whose images are to be posted to
- *                 hardware state.
- */
-static void radeonSetTexImages( radeonContextPtr rmesa,
-				struct gl_texture_object *tObj )
-{
-   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint curOffset, blitWidth;
-   GLint i, texelBytes;
-   GLint numLevels;
-   GLint log2Width, log2Height, log2Depth;
-
-   /* Set the hardware texture format
-    */
-   if ( !t->image_override ) {
-      t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
-                          RADEON_TXFORMAT_ALPHA_IN_MAP);
-      t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
-
-      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
-         t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
-         t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
-      }
-      else {
-         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
-         return;
-      }
-   }
-
-   texelBytes = baseImage->TexFormat->TexelBytes;
-
-   /* Compute which mipmap levels we really want to send to the hardware.
-    */
-
-   if (tObj->Target != GL_TEXTURE_CUBE_MAP)
-      driCalculateTextureFirstLastLevel( (driTextureObject *) t );
-   else {
-      /* r100 can't handle mipmaps for cube/3d textures, so don't waste
-         memory for them */
-      t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
-   }
-   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
-   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
-   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
-
-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-
-   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-
-   /* Calculate mipmap offsets and dimensions for blitting (uploading)
-    * The idea is that we lay out the mipmap levels within a block of
-    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
-    */
-   curOffset = 0;
-   blitWidth = BLIT_WIDTH_BYTES;
-   t->tile_bits = 0;
-
-   /* figure out if this texture is suitable for tiling. */
-   if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
-      if (rmesa->texmicrotile && (baseImage->Height > 1)) {
-	 /* allow 32 (bytes) x 1 mip (which will use two times the space
-	    the non-tiled version would use) max if base texture is large enough */
-	 if ((numLevels == 1) ||
-	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
-	       (baseImage->Width * texelBytes > 64)) ||
-	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
-	    /* R100 has two microtile bits (only the txoffset reg, not the blitter)
-	       weird: X2 + OPT: 32bit correct, 16bit completely hosed
-		      X2: 32bit correct, 16bit correct
-		      OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
-	    t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
-	 }
-      }
-      if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
-	 /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
-	    in the case if height is smaller than 16 (not 100% sure), as does the r200,
-	    so need to disable macro tiling in that case */
-	 if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
-	    t->tile_bits |= RADEON_TXO_MACRO_TILE;
-	 }
-      }
-   }
-
-   for (i = 0; i < numLevels; i++) {
-      const struct gl_texture_image *texImage;
-      GLuint size;
-
-      texImage = tObj->Image[0][i + t->base.firstLevel];
-      if ( !texImage )
-	 break;
-
-      /* find image size in bytes */
-      if (texImage->IsCompressed) {
-      /* need to calculate the size AFTER padding even though the texture is
-         submitted without padding.
-         Only handle pot textures currently - don't know if npot is even possible,
-         size calculation would certainly need (trivial) adjustments.
-         Align (and later pad) to 32byte, not sure what that 64byte blit width is
-         good for? */
-         if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
-            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
-            if ((texImage->Width + 3) < 8) /* width one block */
-               size = texImage->CompressedSize * 4;
-            else if ((texImage->Width + 3) < 16)
-               size = texImage->CompressedSize * 2;
-            else size = texImage->CompressedSize;
-         }
-         else /* DXT3/5, 16 bytes per block */
-            if ((texImage->Width + 3) < 8)
-               size = texImage->CompressedSize * 2;
-            else size = texImage->CompressedSize;
-      }
-      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
-      }
-      else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
-	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-	    though the actual offset may be different (if texture is less than
-	    32 bytes width) to the untiled case */
-	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
-	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-      }
-      else {
-	 int w = (texImage->Width * texelBytes + 31) & ~31;
-	 size = w * texImage->Height * texImage->Depth;
-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
-      }
-      assert(size > 0);
-
-      /* Align to 32-byte offset.  It is faster to do this unconditionally
-       * (no branch penalty).
-       */
-
-      curOffset = (curOffset + 0x1f) & ~0x1f;
-
-      if (texelBytes) {
-	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
-	 t->image[0][i].y = 0;
-	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
-	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
-      }
-      else {
-         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-         t->image[0][i].height = size / t->image[0][i].width;     
-      }
-
-#if 0
-      /* for debugging only and only  applicable to non-rectangle targets */
-      assert(size % t->image[0][i].width == 0);
-      assert(t->image[0][i].x == 0
-             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
-#endif
-
-      if (0)
-         fprintf(stderr,
-                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
-                 i, texImage->Width, texImage->Height,
-                 t->image[0][i].x, t->image[0][i].y,
-                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
-
-      curOffset += size;
-
-   }
-
-   /* Align the total size of texture memory block.
-    */
-   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-
-   /* Setup remaining cube face blits, if needed */
-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      const GLuint faceSize = t->base.totalSize;
-      GLuint face;
-      /* reuse face 0 x/y/width/height - just update the offset when uploading */
-      for (face = 1; face < 6; face++) {
-         for (i = 0; i < numLevels; i++) {
-            t->image[face][i].x =  t->image[0][i].x;
-            t->image[face][i].y =  t->image[0][i].y;
-            t->image[face][i].width  = t->image[0][i].width;
-            t->image[face][i].height = t->image[0][i].height;
-         }
-      }
-      t->base.totalSize = 6 * faceSize; /* total texmem needed */
-   }
-
-   /* Hardware state:
-    */
-   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
-   t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
-
-   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
-		       RADEON_TXFORMAT_HEIGHT_MASK |
-                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
-                       RADEON_TXFORMAT_F5_WIDTH_MASK |
-                       RADEON_TXFORMAT_F5_HEIGHT_MASK);
-   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
-		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
-
-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      assert(log2Width == log2Height);
-      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
-                         (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
-                         (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
-      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
-                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
-                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
-                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
-                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
-                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
-                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
-                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
-   }
-
-   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
-                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
-
-   /* Only need to round to nearest 32 for textures, but the blitter
-    * requires 64-byte aligned pitches, and we may/may not need the
-    * blitter.   NPOT only!
-    */
-   if ( !t->image_override ) {
-      if (baseImage->IsCompressed)
-         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
-      else
-         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
-      t->pp_txpitch -= 32;
-   }
-
-   t->dirty_state = TEX_ALL;
-
-   /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
-}
-
-
-
 /* ================================================================
  * Texture combine functions
  */
@@ -503,7 +260,7 @@ do {							\
 
 static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    GLuint color_combine, alpha_combine;
    const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
@@ -520,7 +277,7 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
    assert( (texUnit->_ReallyEnabled == 0)
 	   || (texUnit->_Current != NULL) );
 
-   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+   if ( RADEON_DEBUG & RADEON_TEXTURE ) {
       fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
    }
 
@@ -846,22 +603,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
 void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
                         unsigned long long offset, GLint depth, GLuint pitch)
 {
-	radeonContextPtr rmesa = pDRICtx->driverPrivate;
+	r100ContextPtr rmesa = pDRICtx->driverPrivate;
 	struct gl_texture_object *tObj =
-	    _mesa_lookup_texture(rmesa->glCtx, texname);
-	radeonTexObjPtr t;
+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+	radeonTexObjPtr t = radeon_tex_obj(tObj);
 
 	if (tObj == NULL)
 		return;
 
-	t = (radeonTexObjPtr) tObj->DriverData;
-
 	t->image_override = GL_TRUE;
 
 	if (!offset)
 		return;
-
-	t->pp_txoffset = offset;
+	
+	t->bo = NULL;
+	t->override_offset = offset;
 	t->pp_txpitch = pitch - 32;
 
 	switch (depth) {
@@ -881,6 +637,125 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
 	}
 }
 
+void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+			 __DRIdrawable *dPriv)
+{
+	struct gl_texture_unit *texUnit;
+	struct gl_texture_object *texObj;
+	struct gl_texture_image *texImage;
+	struct radeon_renderbuffer *rb;
+	radeon_texture_image *rImage;
+	radeonContextPtr radeon;
+	r100ContextPtr rmesa;
+	struct radeon_framebuffer *rfb;
+	radeonTexObjPtr t;
+	uint32_t pitch_val;
+	uint32_t internalFormat, type, format;
+
+	type = GL_BGRA;
+	format = GL_UNSIGNED_BYTE;
+	internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
+
+	radeon = pDRICtx->driverPrivate;
+	rmesa = pDRICtx->driverPrivate;
+
+	rfb = dPriv->driverPrivate;
+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+	rImage = get_radeon_texture_image(texImage);
+	t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+    	    return;
+    	}
+
+	radeon_update_renderbuffers(pDRICtx, dPriv);
+	/* back & depth buffer are useless free them right away */
+	rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+        rb->bo = NULL;
+	}
+	rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
+	rb = rfb->color_rb[0];
+	if (rb->bo == NULL) {
+		/* Failed to BO for the buffer */
+		return;
+	}
+	
+	_mesa_lock_texture(radeon->glCtx, texObj);
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+	if (rImage->bo) {
+		radeon_bo_unref(rImage->bo);
+		rImage->bo = NULL;
+	}
+	if (t->mt) {
+		radeon_miptree_unreference(t->mt);
+		t->mt = NULL;
+	}
+	if (rImage->mt) {
+		radeon_miptree_unreference(rImage->mt);
+		rImage->mt = NULL;
+	}
+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+				   rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+	texImage->RowStride = rb->pitch / rb->cpp;
+	texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
+							internalFormat,
+							type, format, 0);
+	rImage->bo = rb->bo;
+	radeon_bo_ref(rImage->bo);
+	t->bo = rb->bo;
+	radeon_bo_ref(t->bo);
+	t->tile_bits = 0;
+	t->image_override = GL_TRUE;
+	t->override_offset = 0;
+	t->pp_txpitch &= (1 << 13) -1;
+	pitch_val = rb->pitch;
+	switch (rb->cpp) {
+	case 4:
+		if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+			t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+		else
+			t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
+		break;
+	case 3:
+	default:
+		t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
+		break;
+	case 2:
+		t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
+		break;
+	}
+        t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+		   | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+        t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+	t->pp_txpitch = pitch_val;
+        t->pp_txpitch -= 32;
+
+	t->validated = GL_TRUE;
+	_mesa_unlock_texture(radeon->glCtx, texObj);
+	return;
+}
+
+
+void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        radeonSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
+}
+
+
 #define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
 			      RADEON_MIN_FILTER_MASK | 		\
 			      RADEON_MAG_FILTER_MASK |		\
@@ -901,12 +776,53 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
                               RADEON_TXFORMAT_NON_POWER2)
 
 
-static void import_tex_obj_state( radeonContextPtr rmesa,
+static void disable_tex_obj_state( r100ContextPtr rmesa, 
+				   int unit )
+{
+   RADEON_STATECHANGE( rmesa, tex[unit] );
+
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
+					     RADEON_Q_BIT(unit));
+   
+   if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+     TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+     rmesa->recheck_texgen[unit] = GL_TRUE;
+   }
+
+   if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
+     /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
+	cubic_map bit on unit 2 when the unit is disabled, otherwise every
+	2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
+	units, better be safe than sorry though).*/
+     RADEON_STATECHANGE( rmesa, tex[unit] );
+     rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
+   }
+
+   {
+      GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+      GLuint tmp = rmesa->TexGenEnabled;
+
+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+      rmesa->TexGenNeedNormals[unit] = 0;
+      rmesa->TexGenEnabled |= 
+	(RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+
+      if (tmp != rmesa->TexGenEnabled) {
+	rmesa->recheck_texgen[unit] = GL_TRUE;
+	rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+      }
+   }
+}
+
+static void import_tex_obj_state( r100ContextPtr rmesa,
 				  int unit,
 				  radeonTexObjPtr texobj )
 {
 /* do not use RADEON_DB_STATE to avoid stale texture caches */
-   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
 
    RADEON_STATECHANGE( rmesa, tex[unit] );
@@ -915,10 +831,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
    cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
-   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
 
-   if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+   if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
@@ -928,22 +843,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
    else {
       se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
 
-      if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
-	 int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
-	 GLuint bytesPerFace = texobj->base.totalSize / 6;
-	 ASSERT(texobj->base.totalSize % 6 == 0);
+      if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+	 uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
 
 	 RADEON_STATECHANGE( rmesa, cube[unit] );
 	 cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
-	 /* dont know if this setup conforms to OpenGL.. 
-	  * at least it matches the behavior of mesa software renderer
-	  */
-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
-	 cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
+	 /* state filled out in the cube_emit */
       }
    }
 
@@ -952,13 +857,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
    }
 
-   texobj->dirty_state &= ~(1<<unit);
+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
 }
 
 
-
-
-static void set_texgen_matrix( radeonContextPtr rmesa, 
+static void set_texgen_matrix( r100ContextPtr rmesa, 
 			       GLuint unit,
 			       const GLfloat *s_plane,
 			       const GLfloat *t_plane,
@@ -986,14 +889,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
    rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
 
    rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
-   rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
 }
 
 /* Returns GL_FALSE if fallback required.
  */
 static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
    GLuint tmp = rmesa->TexGenEnabled;
@@ -1030,7 +933,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
 	    (texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) {
 	 /* Mixed modes, fallback:
 	  */
-	 if (RADEON_DEBUG & DEBUG_FALLBACKS)
+	 if (RADEON_DEBUG & RADEON_FALLBACKS)
 	    fprintf(stderr, "fallback mixed texgen\n");
 	 return GL_FALSE;
       }
@@ -1038,7 +941,7 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
    }
    else {
    /* some texgen mode not including both S and T bits */
-      if (RADEON_DEBUG & DEBUG_FALLBACKS)
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
 	 fprintf(stderr, "fallback mixed texgen/nontexgen\n");
       return GL_FALSE;
    }
@@ -1088,289 +991,195 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
    default:
       /* Unsupported mode, fallback:
        */
-      if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
 	 fprintf(stderr, "fallback GL_SPHERE_MAP\n");
       return GL_FALSE;
    }
 
    if (tmp != rmesa->TexGenEnabled) {
-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
    }
 
    return GL_TRUE;
 }
 
-
-static void disable_tex( GLcontext *ctx, int unit )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
-      /* Texture unit disabled */
-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
-	 /* The old texture is no longer bound to this texture unit.
-	  * Mark it as such.
-	  */
-
-	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
-	 rmesa->state.texture.unit[unit].texobj = NULL;
-      }
-
-      RADEON_STATECHANGE( rmesa, ctx );
-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
-	  ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
-
-      RADEON_STATECHANGE( rmesa, tcl );
-      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
-						RADEON_Q_BIT(unit));
-
-      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
-	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
-	 rmesa->recheck_texgen[unit] = GL_TRUE;
-      }
-
-      if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
-      /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
-         cubic_map bit on unit 2 when the unit is disabled, otherwise every
-	 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
-	 units, better be safe than sorry though).*/
-	 RADEON_STATECHANGE( rmesa, tex[unit] );
-	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
-      }
-
-      {
-	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
-	 GLuint tmp = rmesa->TexGenEnabled;
-
-	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
-	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
-	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
-	 rmesa->TexGenNeedNormals[unit] = 0;
-	 rmesa->TexGenEnabled |= 
-	     (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
-
-	 if (tmp != rmesa->TexGenEnabled) {
-	    rmesa->recheck_texgen[unit] = GL_TRUE;
-	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-	 }
-      }
-   }
-}
-
-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-
-   /* Need to load the 2d images associated with this unit.
-    */
-   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
-      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
-      t->base.dirty_images[0] = ~0;
-   }
-
-   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
-
-   if ( t->base.dirty_images[0] ) {
-      RADEON_FIREVERTICES( rmesa );
-      radeonSetTexImages( rmesa, tObj );
-      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
-      if ( !t->base.memBlock && !t->image_override ) 
-	return GL_FALSE;
-   }
-
-   return GL_TRUE;
-}
-
-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-   GLuint face;
+   const struct gl_texture_image *firstImage;
+   GLint log2Width, log2Height, log2Depth, texelBytes;
 
-   /* Need to load the 2d images associated with this unit.
-    */
-   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
-      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
-      for (face = 0; face < 6; face++)
-         t->base.dirty_images[face] = ~0;
+   if ( t->bo ) {
+	return GL_TRUE;
    }
 
-   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+   firstImage = t->base.Image[0][t->mt->firstLevel];   
 
-   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
-        t->base.dirty_images[2] || t->base.dirty_images[3] ||
-        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
-      /* flush */
-      RADEON_FIREVERTICES( rmesa );
-      /* layout memory space, once for all faces */
-      radeonSetTexImages( rmesa, tObj );
+   if (firstImage->Border > 0) {
+      fprintf(stderr, "%s: border\n", __FUNCTION__);
+      return GL_FALSE;
    }
 
-   /* upload (per face) */
-   for (face = 0; face < 6; face++) {
-      if (t->base.dirty_images[face]) {
-         radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
+   log2Width  = firstImage->WidthLog2;
+   log2Height = firstImage->HeightLog2;
+   log2Depth  = firstImage->DepthLog2;
+   texelBytes = firstImage->TexFormat->TexelBytes;
+
+   if (!t->image_override) {
+      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
+	const struct tx_table *table = tx_table;
+
+	 t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+			     RADEON_TXFORMAT_ALPHA_IN_MAP);
+	 t->pp_txfilter &= ~RADEON_YUV_TO_RGB;	 
+	 
+	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
+	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
+      } else {
+	 _mesa_problem(NULL, "unexpected texture format in %s",
+		       __FUNCTION__);
+	 return GL_FALSE;
       }
    }
-      
-   if ( !t->base.memBlock ) {
-      /* texmem alloc failed, use s/w fallback */
-      return GL_FALSE;
+   
+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
+	
+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+		       RADEON_TXFORMAT_HEIGHT_MASK |
+		       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+		       RADEON_TXFORMAT_F5_WIDTH_MASK |
+		       RADEON_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   
+   t->tile_bits = 0;
+   
+   if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+			 (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
+			 (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
    }
 
-   return GL_TRUE;
-}
-
-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
+		   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
 
-   if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
-      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
-      t->base.dirty_images[0] = ~0;
+   if ( !t->image_override ) {
+      if (firstImage->IsCompressed)
+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+      else
+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+      t->pp_txpitch -= 32;
    }
 
-   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
-
-   if ( t->base.dirty_images[0] ) {
-      RADEON_FIREVERTICES( rmesa );
-      radeonSetTexImages( rmesa, tObj );
-      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
-      if ( !t->base.memBlock &&
-           !t->image_override /* && !rmesa->prefer_gart_client_texturing  FIXME */ ) {
-	 fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
-	 return GL_FALSE;
-      }
+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
    }
 
    return GL_TRUE;
 }
 
-
-static GLboolean update_tex_common( GLcontext *ctx, int unit )
+static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
-   GLenum format;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj *t = radeon_tex_obj(texObj);
+   int ret;
 
-   /* Fallback if there's a texture border */
-   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
-      fprintf(stderr, "%s: border\n", __FUNCTION__);
+   if (!radeon_validate_texture_miptree(ctx, texObj))
       return GL_FALSE;
-   }
+
+   ret = setup_hardware_state(rmesa, t, unit);
+   if (ret == GL_FALSE)
+     return GL_FALSE;
+
    /* yuv conversion only works in first unit */
    if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
       return GL_FALSE;
 
-   /* Update state if this is a different texture object to last
-    * time.
-    */
-   if ( rmesa->state.texture.unit[unit].texobj != t ) {
-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
-	 /* The old texture is no longer bound to this texture unit.
-	  * Mark it as such.
-	  */
-
-	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
-	     ~(1UL << unit);
-      }
-
-      rmesa->state.texture.unit[unit].texobj = t;
-      t->base.bound |= (1UL << unit);
-      t->dirty_state |= 1<<unit;
-      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
-   }
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
+     (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
 
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
 
-   /* Newly enabled?
-    */
-   if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
-      RADEON_STATECHANGE( rmesa, ctx );
-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
-	  (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
 
-      RADEON_STATECHANGE( rmesa, tcl );
-
-      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
-
-      rmesa->recheck_texgen[unit] = GL_TRUE;
-   }
-
-   if (t->dirty_state & (1<<unit)) {
-      import_tex_obj_state( rmesa, unit, t );
-      /* may need to update texture matrix (for texrect adjustments) */
-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
-   }
+   import_tex_obj_state( rmesa, unit, t );
 
    if (rmesa->recheck_texgen[unit]) {
       GLboolean fallback = !radeon_validate_texgen( ctx, unit );
       TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
       rmesa->recheck_texgen[unit] = 0;
-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
    }
 
-   format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
-   if ( rmesa->state.texture.unit[unit].format != format ||
-	rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
-      rmesa->state.texture.unit[unit].format = format;
-      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
-      if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
-	 return GL_FALSE;
-      }
+   if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+     return GL_FALSE;
    }
-
    FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+
+   t->validated = GL_TRUE;
    return !t->border_fallback;
 }
 
-
-
 static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
 {
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
 
-   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
-      return (enable_tex_rect( ctx, unit ) &&
-	      update_tex_common( ctx, unit ));
-   }
-   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
-      return (enable_tex_2d( ctx, unit ) &&
-	      update_tex_common( ctx, unit ));
-   }
-   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
-      return (enable_tex_cube( ctx, unit ) &&
-	      update_tex_common( ctx, unit ));
+   if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_FALSE;
    }
-   else if ( texUnit->_ReallyEnabled ) {
-      return GL_FALSE;
+
+   if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+     /* disable the unit */
+     disable_tex_obj_state(rmesa, unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_TRUE;
    }
-   else {
-      disable_tex( ctx, unit );
-      return GL_TRUE;
+
+   if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+    _mesa_warning(ctx,
+		  "failed to validate texture for unit %d.\n",
+		  unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_FALSE;
    }
+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+   return GL_TRUE;
 }
 
 void radeonUpdateTextureState( GLcontext *ctx )
 {
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
    GLboolean ok;
 
+   /* set the ctx all textures off */
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
+
    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
 	 radeonUpdateTextureUnit( ctx, 1 ) &&
 	 radeonUpdateTextureUnit( ctx, 2 ));
 
    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
 
-   if (rmesa->TclFallback)
+   if (rmesa->radeon.TclFallback)
       radeonChooseVertexState( ctx );
 }
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
new file mode 100644
index 00000000000..fad3d1cedaf
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -0,0 +1,1052 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/convolve.h"
+#include "main/mipmap.h"
+#include "main/texcompress.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texgetimage.h"
+
+#include "xmlpool.h"		/* for symbolic values of enum-type options */
+
+#include "radeon_common.h"
+
+#include "radeon_mipmap_tree.h"
+
+
+static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+	GLuint numrows, GLuint rowsize)
+{
+	assert(rowsize <= dststride);
+	assert(rowsize <= srcstride);
+
+	if (rowsize == srcstride && rowsize == dststride) {
+		memcpy(dst, src, numrows*rowsize);
+	} else {
+		GLuint i;
+		for(i = 0; i < numrows; ++i) {
+			memcpy(dst, src, rowsize);
+			dst += dststride;
+			src += srcstride;
+		}
+	}
+}
+
+/* textures */
+/**
+ * Allocate an empty texture image object.
+ */
+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx)
+{
+	return CALLOC(sizeof(radeon_texture_image));
+}
+
+/**
+ * Free memory associated with this texture image.
+ */
+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
+{
+	radeon_texture_image* image = get_radeon_texture_image(timage);
+
+	if (image->mt) {
+		radeon_miptree_unreference(image->mt);
+		image->mt = 0;
+		assert(!image->base.Data);
+	} else {
+		_mesa_free_texture_image_data(ctx, timage);
+	}
+	if (image->bo) {
+		radeon_bo_unref(image->bo);
+		image->bo = NULL;
+	}
+	if (timage->Data) {
+		_mesa_free_texmemory(timage->Data);
+		timage->Data = NULL;
+	}
+}
+
+/* Set Data pointer and additional data for mapped texture image */
+static void teximage_set_map_data(radeon_texture_image *image)
+{
+	radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+
+	image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
+	image->base.RowStride = lvl->rowstride / image->mt->bpp;
+}
+
+
+/**
+ * Map a single texture image for glTexImage and friends.
+ */
+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable)
+{
+	if (image->mt) {
+		assert(!image->base.Data);
+
+		radeon_bo_map(image->mt->bo, write_enable);
+		teximage_set_map_data(image);
+	}
+}
+
+
+void radeon_teximage_unmap(radeon_texture_image *image)
+{
+	if (image->mt) {
+		assert(image->base.Data);
+
+		image->base.Data = 0;
+		radeon_bo_unmap(image->mt->bo);
+	}
+}
+
+static void map_override(GLcontext *ctx, radeonTexObj *t)
+{
+	radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
+
+	radeon_bo_map(t->bo, GL_FALSE);
+
+	img->base.Data = t->bo->ptr;
+	_mesa_set_fetch_functions(&img->base, 2);
+}
+
+static void unmap_override(GLcontext *ctx, radeonTexObj *t)
+{
+	radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
+
+	radeon_bo_unmap(t->bo);
+
+	img->base.Data = NULL;
+}
+
+/**
+ * Map a validated texture for reading during software rendering.
+ */
+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	int face, level;
+
+	if (!radeon_validate_texture_miptree(ctx, texObj))
+	  return;
+
+	/* for r100 3D sw fallbacks don't have mt */
+	if (t->image_override && t->bo)
+		map_override(ctx, t);
+
+	if (!t->mt)
+		return;
+
+	radeon_bo_map(t->mt->bo, GL_FALSE);
+	for(face = 0; face < t->mt->faces; ++face) {
+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
+			teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level]));
+	}
+}
+
+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	int face, level;
+
+	if (t->image_override && t->bo)
+		unmap_override(ctx, t);
+	/* for r100 3D sw fallbacks don't have mt */
+	if (!t->mt)
+	  return;
+
+	for(face = 0; face < t->mt->faces; ++face) {
+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
+			texObj->Image[face][level]->Data = 0;
+	}
+	radeon_bo_unmap(t->mt->bo);
+}
+
+GLuint radeon_face_for_target(GLenum target)
+{
+	switch (target) {
+	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+		return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+	default:
+		return 0;
+	}
+}
+
+/**
+ * Wraps Mesa's implementation to ensure that the base level image is mapped.
+ *
+ * This relies on internal details of _mesa_generate_mipmap, in particular
+ * the fact that the memory for recreated texture images is always freed.
+ */
+static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
+				   struct gl_texture_object *texObj)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+	int i, face;
+
+
+	_mesa_generate_mipmap(ctx, target, texObj);
+
+	for (face = 0; face < nr_faces; face++) {
+		for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+			radeon_texture_image *image;
+
+			image = get_radeon_texture_image(texObj->Image[face][i]);
+
+			if (image == NULL)
+				break;
+
+			image->mtlevel = i;
+			image->mtface = face;
+
+			radeon_miptree_unreference(image->mt);
+			image->mt = NULL;
+		}
+	}
+	
+}
+
+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
+{
+	GLuint face = radeon_face_for_target(target);
+	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
+
+	radeon_teximage_map(baseimage, GL_FALSE);
+	radeon_generate_mipmap(ctx, target, texObj);
+	radeon_teximage_unmap(baseimage);
+}
+
+
+/* try to find a format which will only need a memcopy */
+static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa,
+								 GLenum srcFormat,
+								 GLenum srcType, GLboolean fbo)
+{
+	const GLuint ui = 1;
+	const GLubyte littleEndian = *((const GLubyte *)&ui);
+
+	/* r100 can only do this */
+	if (IS_R100_CLASS(rmesa->radeonScreen) || fbo)
+	  return _dri_texformat_argb8888;
+
+	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+		return &_mesa_texformat_rgba8888;
+	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+		return &_mesa_texformat_rgba8888_rev;
+	} else if (IS_R200_CLASS(rmesa->radeonScreen)) {
+		return _dri_texformat_argb8888;
+	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+		return &_mesa_texformat_argb8888_rev;
+	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+		return &_mesa_texformat_argb8888;
+	} else
+		return _dri_texformat_argb8888;
+}
+
+const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
+							  GLint internalFormat,
+							  GLenum format,
+							  GLenum type)
+{
+	return radeonChooseTextureFormat(ctx, internalFormat, format,
+					 type, 0);
+}
+
+const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
+							  GLint internalFormat,
+							  GLenum format,
+							  GLenum type, GLboolean fbo)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	const GLboolean do32bpt =
+	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+	const GLboolean force16bpt =
+	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+	(void)format;
+
+#if 0
+	fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
+		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+	fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
+#endif
+
+	switch (internalFormat) {
+	case 4:
+	case GL_RGBA:
+	case GL_COMPRESSED_RGBA:
+		switch (type) {
+		case GL_UNSIGNED_INT_10_10_10_2:
+		case GL_UNSIGNED_INT_2_10_10_10_REV:
+			return do32bpt ? _dri_texformat_argb8888 :
+			    _dri_texformat_argb1555;
+		case GL_UNSIGNED_SHORT_4_4_4_4:
+		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+			return _dri_texformat_argb4444;
+		case GL_UNSIGNED_SHORT_5_5_5_1:
+		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+			return _dri_texformat_argb1555;
+		default:
+			return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+			    _dri_texformat_argb4444;
+		}
+
+	case 3:
+	case GL_RGB:
+	case GL_COMPRESSED_RGB:
+		switch (type) {
+		case GL_UNSIGNED_SHORT_4_4_4_4:
+		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+			return _dri_texformat_argb4444;
+		case GL_UNSIGNED_SHORT_5_5_5_1:
+		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+			return _dri_texformat_argb1555;
+		case GL_UNSIGNED_SHORT_5_6_5:
+		case GL_UNSIGNED_SHORT_5_6_5_REV:
+			return _dri_texformat_rgb565;
+		default:
+			return do32bpt ? _dri_texformat_argb8888 :
+			    _dri_texformat_rgb565;
+		}
+
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return !force16bpt ?
+			radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+			_dri_texformat_argb4444;
+
+	case GL_RGBA4:
+	case GL_RGBA2:
+		return _dri_texformat_argb4444;
+
+	case GL_RGB5_A1:
+		return _dri_texformat_argb1555;
+
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return !force16bpt ? _dri_texformat_argb8888 :
+		    _dri_texformat_rgb565;
+
+	case GL_RGB5:
+	case GL_RGB4:
+	case GL_R3_G3_B2:
+		return _dri_texformat_rgb565;
+
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+	case GL_COMPRESSED_ALPHA:
+		/* r200: can't use a8 format since interpreting hw I8 as a8 would result
+		   in wrong rgb values (same as alpha value instead of 0). */
+		if (IS_R200_CLASS(rmesa->radeonScreen))
+			return _dri_texformat_al88;
+		else
+			return _dri_texformat_a8;
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+	case GL_COMPRESSED_LUMINANCE:
+		return _dri_texformat_l8;
+
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+	case GL_COMPRESSED_LUMINANCE_ALPHA:
+		return _dri_texformat_al88;
+
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+	case GL_COMPRESSED_INTENSITY:
+		return _dri_texformat_i8;
+
+	case GL_YCBCR_MESA:
+		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+		    type == GL_UNSIGNED_BYTE)
+			return &_mesa_texformat_ycbcr;
+		else
+			return &_mesa_texformat_ycbcr_rev;
+
+	case GL_RGB_S3TC:
+	case GL_RGB4_S3TC:
+	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+		return &_mesa_texformat_rgb_dxt1;
+
+	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+		return &_mesa_texformat_rgba_dxt1;
+
+	case GL_RGBA_S3TC:
+	case GL_RGBA4_S3TC:
+	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+		return &_mesa_texformat_rgba_dxt3;
+
+	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+		return &_mesa_texformat_rgba_dxt5;
+
+	case GL_ALPHA16F_ARB:
+		return &_mesa_texformat_alpha_float16;
+	case GL_ALPHA32F_ARB:
+		return &_mesa_texformat_alpha_float32;
+	case GL_LUMINANCE16F_ARB:
+		return &_mesa_texformat_luminance_float16;
+	case GL_LUMINANCE32F_ARB:
+		return &_mesa_texformat_luminance_float32;
+	case GL_LUMINANCE_ALPHA16F_ARB:
+		return &_mesa_texformat_luminance_alpha_float16;
+	case GL_LUMINANCE_ALPHA32F_ARB:
+		return &_mesa_texformat_luminance_alpha_float32;
+	case GL_INTENSITY16F_ARB:
+		return &_mesa_texformat_intensity_float16;
+	case GL_INTENSITY32F_ARB:
+		return &_mesa_texformat_intensity_float32;
+	case GL_RGB16F_ARB:
+		return &_mesa_texformat_rgba_float16;
+	case GL_RGB32F_ARB:
+		return &_mesa_texformat_rgba_float32;
+	case GL_RGBA16F_ARB:
+		return &_mesa_texformat_rgba_float16;
+	case GL_RGBA32F_ARB:
+		return &_mesa_texformat_rgba_float32;
+
+	case GL_DEPTH_COMPONENT:
+	case GL_DEPTH_COMPONENT16:
+	case GL_DEPTH_COMPONENT24:
+	case GL_DEPTH_COMPONENT32:
+	case GL_DEPTH_STENCIL_EXT:
+	case GL_DEPTH24_STENCIL8_EXT:
+		return &_mesa_texformat_s8_z24;
+
+	/* EXT_texture_sRGB */
+	case GL_SRGB:
+	case GL_SRGB8:
+	case GL_SRGB_ALPHA:
+	case GL_SRGB8_ALPHA8:
+	case GL_COMPRESSED_SRGB:
+	case GL_COMPRESSED_SRGB_ALPHA:
+		return &_mesa_texformat_srgba8;
+
+	case GL_SLUMINANCE:
+	case GL_SLUMINANCE8:
+	case GL_COMPRESSED_SLUMINANCE:
+		return &_mesa_texformat_sl8;
+
+	case GL_SLUMINANCE_ALPHA:
+	case GL_SLUMINANCE8_ALPHA8:
+	case GL_COMPRESSED_SLUMINANCE_ALPHA:
+		return &_mesa_texformat_sla8;
+
+	default:
+		_mesa_problem(ctx,
+			      "unexpected internalFormat 0x%x in %s",
+			      (int)internalFormat, __func__);
+		return NULL;
+	}
+
+	return NULL;		/* never get here */
+}
+
+/**
+ * All glTexImage calls go through this function.
+ */
+static void radeon_teximage(
+	GLcontext *ctx, int dims,
+	GLenum target, GLint level,
+	GLint internalFormat,
+	GLint width, GLint height, GLint depth,
+	GLsizei imageSize,
+	GLenum format, GLenum type, const GLvoid * pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *texObj,
+	struct gl_texture_image *texImage,
+	int compressed)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	radeon_texture_image* image = get_radeon_texture_image(texImage);
+	GLuint dstRowStride;
+	GLint postConvWidth = width;
+	GLint postConvHeight = height;
+	GLuint texelBytes;
+	GLuint face = radeon_face_for_target(target);
+
+	radeon_firevertices(rmesa);
+
+	t->validated = GL_FALSE;
+
+	if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+	       _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+						  &postConvHeight);
+	}
+
+	/* Choose and fill in the texture format for this image */
+	texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type, 0);
+	_mesa_set_fetch_functions(texImage, dims);
+
+	if (texImage->TexFormat->TexelBytes == 0) {
+		texelBytes = 0;
+		texImage->IsCompressed = GL_TRUE;
+		texImage->CompressedSize =
+			ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
+					   texImage->Height, texImage->Depth,
+					   texImage->TexFormat->MesaFormat);
+	} else {
+		texImage->IsCompressed = GL_FALSE;
+		texImage->CompressedSize = 0;
+
+		texelBytes = texImage->TexFormat->TexelBytes;
+		/* Minimum pitch of 32 bytes */
+		if (postConvWidth * texelBytes < 32) {
+		  postConvWidth = 32 / texelBytes;
+		  texImage->RowStride = postConvWidth;
+		}
+		if (!image->mt) {      
+			assert(texImage->RowStride == postConvWidth);
+		}
+	}
+
+	/* Allocate memory for image */
+	radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */
+
+	if (t->mt &&
+	    t->mt->firstLevel == level &&
+	    t->mt->lastLevel == level &&
+	    t->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
+	    !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
+	  radeon_miptree_unreference(t->mt);
+	  t->mt = NULL;
+	}
+
+	if (!t->mt)
+		radeon_try_alloc_miptree(rmesa, t, image, face, level);
+	if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) {
+		radeon_mipmap_level *lvl;
+		image->mt = t->mt;
+		image->mtlevel = level - t->mt->firstLevel;
+		image->mtface = face;
+		radeon_miptree_reference(t->mt);
+		lvl = &image->mt->levels[image->mtlevel];
+		dstRowStride = lvl->rowstride;
+	} else {
+		int size;
+		if (texImage->IsCompressed) {
+			size = texImage->CompressedSize;
+		} else {
+			size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes;
+		}
+		texImage->Data = _mesa_alloc_texmemory(size);
+	}
+
+	/* Upload texture image; note that the spec allows pixels to be NULL */
+	if (compressed) {
+		pixels = _mesa_validate_pbo_compressed_teximage(
+			ctx, imageSize, pixels, packing, "glCompressedTexImage");
+	} else {
+		pixels = _mesa_validate_pbo_teximage(
+			ctx, dims, width, height, depth,
+			format, type, pixels, packing, "glTexImage");
+	}
+
+	if (pixels) {
+		radeon_teximage_map(image, GL_TRUE);
+		if (compressed) {
+			if (image->mt) {
+				uint32_t srcRowStride, bytesPerRow, rows;
+				srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+				bytesPerRow = srcRowStride;
+				rows = (height + 3) / 4;
+				copy_rows(texImage->Data, image->mt->levels[level].rowstride,
+					  pixels, srcRowStride, rows, bytesPerRow);
+			} else {
+				memcpy(texImage->Data, pixels, imageSize);
+			}
+		} else {
+			GLuint dstRowStride;
+			GLuint *dstImageOffsets;
+
+			if (image->mt) {
+				radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+				dstRowStride = lvl->rowstride;
+			} else {
+				dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
+			}
+
+			if (dims == 3) {
+				int i;
+
+				dstImageOffsets = _mesa_malloc(depth * sizeof(GLuint)) ;
+				if (!dstImageOffsets)
+					_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+
+				for (i = 0; i < depth; ++i) {
+					dstImageOffsets[i] = dstRowStride/texImage->TexFormat->TexelBytes * height * i;
+				}
+			} else {
+				dstImageOffsets = texImage->ImageOffsets;
+			}
+
+			if (!texImage->TexFormat->StoreImage(ctx, dims,
+						texImage->_BaseFormat,
+						texImage->TexFormat,
+						texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
+						dstRowStride,
+						dstImageOffsets,
+						width, height, depth,
+						format, type, pixels, packing))
+				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+
+			if (dims == 3)
+				_mesa_free(dstImageOffsets);
+		}
+
+		/* SGIS_generate_mipmap */
+		if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+			radeon_generate_mipmap(ctx, target, texObj);
+		}
+	}
+
+	_mesa_unmap_teximage_pbo(ctx, packing);
+
+	if (pixels)
+	  radeon_teximage_unmap(image);
+
+
+}
+
+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage)
+{
+	radeon_teximage(ctx, 1, target, level, internalFormat, width, 1, 1,
+		0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+			   GLint internalFormat,
+			   GLint width, GLint height, GLint border,
+			   GLenum format, GLenum type, const GLvoid * pixels,
+			   const struct gl_pixelstore_attrib *packing,
+			   struct gl_texture_object *texObj,
+			   struct gl_texture_image *texImage)
+
+{
+	radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1,
+		0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
+				     GLint level, GLint internalFormat,
+				     GLint width, GLint height, GLint border,
+				     GLsizei imageSize, const GLvoid * data,
+				     struct gl_texture_object *texObj,
+				     struct gl_texture_image *texImage)
+{
+	radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1,
+		imageSize, 0, 0, data, &ctx->Unpack, texObj, texImage, 1);
+}
+
+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint height, GLint depth,
+		      GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage)
+{
+	radeon_teximage(ctx, 3, target, level, internalFormat, width, height, depth,
+		0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+/**
+ * Update a subregion of the given texture image.
+ */
+static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level,
+		GLint xoffset, GLint yoffset, GLint zoffset,
+		GLsizei width, GLsizei height, GLsizei depth,
+		GLsizei imageSize,
+		GLenum format, GLenum type,
+		const GLvoid * pixels,
+		const struct gl_pixelstore_attrib *packing,
+		struct gl_texture_object *texObj,
+		struct gl_texture_image *texImage,
+		int compressed)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+	radeon_firevertices(rmesa);
+
+	t->validated = GL_FALSE;
+	if (compressed) {
+		pixels = _mesa_validate_pbo_compressed_teximage(
+			ctx, imageSize, pixels, packing, "glCompressedTexImage");
+	} else {
+		pixels = _mesa_validate_pbo_teximage(ctx, dims,
+			width, height, depth, format, type, pixels, packing, "glTexSubImage1D");
+	}
+
+	if (pixels) {
+		GLint dstRowStride;
+		radeon_teximage_map(image, GL_TRUE);
+
+		if (image->mt) {
+			radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+			dstRowStride = lvl->rowstride;
+		} else {
+			dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
+		}
+
+		if (compressed) {
+			uint32_t srcRowStride, bytesPerRow, rows;
+			GLubyte *img_start;
+			if (!image->mt) {
+				dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width);
+				img_start = _mesa_compressed_image_address(xoffset, yoffset, 0,
+									   texImage->TexFormat->MesaFormat,
+									   texImage->Width, texImage->Data);
+			}
+			else {
+				uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4);
+				img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4);
+			}
+			srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+			bytesPerRow = srcRowStride;
+			rows = (height + 3) / 4;
+
+			copy_rows(img_start, dstRowStride,  pixels, srcRowStride, rows,  bytesPerRow);
+			
+		} else {
+			if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+							     texImage->TexFormat, texImage->Data,
+							     xoffset, yoffset, zoffset,
+							     dstRowStride,
+							     texImage->ImageOffsets,
+							     width, height, depth,
+							     format, type, pixels, packing))
+				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+		}
+
+		/* GL_SGIS_generate_mipmap */
+		if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+			radeon_generate_mipmap(ctx, target, texObj);
+		}
+	}
+
+	radeon_teximage_unmap(image);
+
+	_mesa_unmap_teximage_pbo(ctx, packing);
+
+
+}
+
+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset,
+			 GLsizei width,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1, 0,
+		format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset, GLint yoffset,
+			 GLsizei width, GLsizei height,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1,
+			   0, format, type, pixels, packing, texObj, texImage,
+			   0);
+}
+
+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+				   GLint level, GLint xoffset,
+				   GLint yoffset, GLsizei width,
+				   GLsizei height, GLenum format,
+				   GLsizei imageSize, const GLvoid * data,
+				   struct gl_texture_object *texObj,
+				   struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1,
+		imageSize, format, 0, data, &ctx->Unpack, texObj, texImage, 1);
+}
+
+
+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset, GLint yoffset, GLint zoffset,
+			 GLsizei width, GLsizei height, GLsizei depth,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0,
+		format, type, pixels, packing, texObj, texImage, 0);
+}
+
+
+
+/**
+ * Ensure that the given image is stored in the given miptree from now on.
+ */
+static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level)
+{
+	radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel];
+	unsigned char *dest;
+
+	assert(image->mt != mt);
+	assert(dstlvl->width == image->base.Width);
+	assert(dstlvl->height == image->base.Height);
+	assert(dstlvl->depth == image->base.Depth);
+
+
+	radeon_bo_map(mt->bo, GL_TRUE);
+	dest = mt->bo->ptr + dstlvl->faces[face].offset;
+
+	if (image->mt) {
+		/* Format etc. should match, so we really just need a memcpy().
+		 * In fact, that memcpy() could be done by the hardware in many
+		 * cases, provided that we have a proper memory manager.
+		 */
+		radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel-image->mt->firstLevel];
+
+		assert(srclvl->size == dstlvl->size);
+		assert(srclvl->rowstride == dstlvl->rowstride);
+
+		radeon_bo_map(image->mt->bo, GL_FALSE);
+
+		memcpy(dest,
+			image->mt->bo->ptr + srclvl->faces[face].offset,
+			dstlvl->size);
+		radeon_bo_unmap(image->mt->bo);
+
+		radeon_miptree_unreference(image->mt);
+	} else {
+		uint32_t srcrowstride;
+		uint32_t height;
+		/* need to confirm this value is correct */
+		if (mt->compressed) {
+			height = (image->base.Height + 3) / 4;
+			srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width);
+		} else {
+			height = image->base.Height * image->base.Depth;
+			srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes;
+		}
+
+//		if (mt->tilebits)
+//			WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
+
+		copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
+			  height, srcrowstride);
+
+		_mesa_free_texmemory(image->base.Data);
+		image->base.Data = 0;
+	}
+
+	radeon_bo_unmap(mt->bo);
+
+	image->mt = mt;
+	image->mtface = face;
+	image->mtlevel = level;
+	radeon_miptree_reference(image->mt);
+}
+
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj *t = radeon_tex_obj(texObj);
+	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]);
+	int face, level;
+
+	if (t->validated || t->image_override)
+		return GL_TRUE;
+
+	if (RADEON_DEBUG & RADEON_TEXTURE)
+		fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj);
+
+	if (baseimage->base.Border > 0)
+		return GL_FALSE;
+
+	/* Ensure a matching miptree exists.
+	 *
+	 * Differing mipmap trees can result when the app uses TexImage to
+	 * change texture dimensions.
+	 *
+	 * Prefer to use base image's miptree if it
+	 * exists, since that most likely contains more valid data (remember
+	 * that the base level is usually significantly larger than the rest
+	 * of the miptree, so cubemaps are the only possible exception).
+	 */
+	if (baseimage->mt &&
+	    baseimage->mt != t->mt &&
+	    radeon_miptree_matches_texture(baseimage->mt, &t->base)) {
+		radeon_miptree_unreference(t->mt);
+		t->mt = baseimage->mt;
+		radeon_miptree_reference(t->mt);
+	} else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) {
+		radeon_miptree_unreference(t->mt);
+		t->mt = 0;
+	}
+
+	if (!t->mt) {
+		if (RADEON_DEBUG & RADEON_TEXTURE)
+			fprintf(stderr, " Allocate new miptree\n");
+		radeon_try_alloc_miptree(rmesa, t, baseimage, 0, texObj->BaseLevel);
+		if (!t->mt) {
+			_mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree");
+			return GL_FALSE;
+		}
+	}
+
+	/* Ensure all images are stored in the single main miptree */
+	for(face = 0; face < t->mt->faces; ++face) {
+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) {
+			radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]);
+			if (RADEON_DEBUG & RADEON_TEXTURE)
+				fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt);
+			if (t->mt == image->mt) {
+				if (RADEON_DEBUG & RADEON_TEXTURE)
+					fprintf(stderr, "OK\n");
+
+				continue;
+			}
+
+			if (RADEON_DEBUG & RADEON_TEXTURE)
+				fprintf(stderr, "migrating\n");
+			migrate_image_to_miptree(t->mt, image, face, level);
+		}
+	}
+
+	return GL_TRUE;
+}
+
+
+/**
+ * Need to map texture image into memory before copying image data,
+ * then unmap it.
+ */
+static void
+radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
+		     GLenum format, GLenum type, GLvoid * pixels,
+		     struct gl_texture_object *texObj,
+		     struct gl_texture_image *texImage, int compressed)
+{
+	radeon_texture_image *image = get_radeon_texture_image(texImage);
+
+	if (image->mt) {
+		/* Map the texture image read-only */
+		radeon_teximage_map(image, GL_FALSE);
+	} else {
+		/* Image hasn't been uploaded to a miptree yet */
+		assert(image->base.Data);
+	}
+
+	if (compressed) {
+		/* FIXME: this can't work for small textures (mips) which
+		         use different hw stride */
+		_mesa_get_compressed_teximage(ctx, target, level, pixels,
+					      texObj, texImage);
+	} else {
+		_mesa_get_teximage(ctx, target, level, format, type, pixels,
+				   texObj, texImage);
+	}
+     
+	if (image->mt) {
+		radeon_teximage_unmap(image);
+	}
+}
+
+void
+radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+		  GLenum format, GLenum type, GLvoid * pixels,
+		  struct gl_texture_object *texObj,
+		  struct gl_texture_image *texImage)
+{
+	radeon_get_tex_image(ctx, target, level, format, type, pixels,
+			     texObj, texImage, 0);
+}
+
+void
+radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+			    GLvoid *pixels,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage)
+{
+	radeon_get_tex_image(ctx, target, level, 0, 0, pixels,
+			     texObj, texImage, 1);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
new file mode 100644
index 00000000000..888a55ba911
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_TEXTURE_H
+#define RADEON_TEXTURE_H
+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx);
+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage);
+
+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable);
+void radeon_teximage_unmap(radeon_texture_image *image);
+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
+GLuint radeon_face_for_target(GLenum target);
+const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx,
+							  GLint internalFormat,
+							  GLenum format,
+							  GLenum type);
+const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
+							  GLint internalFormat,
+							  GLenum format,
+							  GLenum type, GLboolean fbo);
+
+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage);
+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint height, GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage);
+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
+				GLint level, GLint internalFormat,
+				GLint width, GLint height, GLint border,
+				GLsizei imageSize, const GLvoid * data,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage);
+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint height, GLint depth,
+		      GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage);
+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset,
+			 GLsizei width,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage);
+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+				GLint xoffset, GLint yoffset,
+				GLsizei width, GLsizei height,
+				GLenum format, GLenum type,
+				const GLvoid * pixels,
+				const struct gl_pixelstore_attrib *packing,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage);
+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+				   GLint level, GLint xoffset,
+				   GLint yoffset, GLsizei width,
+				   GLsizei height, GLenum format,
+				   GLsizei imageSize, const GLvoid * data,
+				   struct gl_texture_object *texObj,
+				   struct gl_texture_image *texImage);
+
+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset, GLint yoffset, GLint zoffset,
+			 GLsizei width, GLsizei height, GLsizei depth,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage);
+
+void radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+		       GLenum format, GLenum type, GLvoid * pixels,
+		       struct gl_texture_object *texObj,
+		       struct gl_texture_image *texImage);
+void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+				 GLvoid *pixels,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
index ae2ccdf292a..e81d7fdcd0e 100644
--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
@@ -1601,6 +1601,8 @@
 #       define RADEON_STENCIL_VALUE_MASK      (0xff << 16)
 #       define RADEON_STENCIL_WRITEMASK_SHIFT 24
 #       define RADEON_STENCIL_WRITE_MASK      (0xff << 24)
+#define RADEON_RB3D_ZPASS_DATA              0x3290
+#define RADEON_RB3D_ZPASS_ADDR              0x3294
 #define RADEON_RB3D_ZSTENCILCNTL            0x1c2c
 #       define RADEON_DEPTH_FORMAT_MASK          (0xf << 0)
 #       define RADEON_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
@@ -1661,6 +1663,9 @@
 #       define RADEON_FORCE_Z_DIRTY              (1  << 29)
 #       define RADEON_Z_WRITE_ENABLE             (1  << 30)
 #       define RADEON_Z_DECOMPRESSION_ENABLE     (1  << 31)
+
+#define RADEON_RE_STIPPLE_ADDR              0x1cc8
+#define RADEON_RE_STIPPLE_DATA              0x1ccc
 #define RADEON_RE_LINE_PATTERN              0x1cd0
 #       define RADEON_LINE_PATTERN_MASK             0x0000ffff
 #       define RADEON_LINE_REPEAT_COUNT_SHIFT       16
@@ -2031,6 +2036,9 @@
 #define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
 #define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
 #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
 #define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
 #define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
 #define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300