summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nouveau/nv50
diff options
context:
space:
mode:
authorJohannes Obermayr <johannesobermayr@gmx.de>2013-08-20 20:14:00 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2013-09-11 21:47:07 +0200
commit5eb7ff1175a644ffe3b0f1a75cb235400355f9fb (patch)
tree613342591e12a96725df715853a5e579ba1ec8ea /src/gallium/drivers/nouveau/nv50
parentebcdaa7bbc3a10fe59447ae77b508ee85eaa582f (diff)
Move nv30, nv50 and nvc0 to nouveau.
It is planned to ship openSUSE 13.1 with -shared libs. nouveau.la, nv30.la, nv50.la and nvc0.la are currently LIBADDs in all nouveau related targets. This change makes it possible to easily build one shared libnouveau.so which is then LIBADDed. Also dlopen will be faster for one library instead of three and build time on -jX will be reduced. Whitespace fixes were requested by 'git am'. Signed-off-by: Johannes Obermayr <johannesobermayr@gmx.de> Acked-by: Christoph Bumiller <christoph.bumiller@speed.at> Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/nv50')
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h416
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h2110
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_blit.h223
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c317
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h322
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_debug.h25
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h200
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_draw.c88
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_formats.c504
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_miptree.c498
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c445
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.h106
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_push.c309
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query.c399
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.c104
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_resource.h153
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c845
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.h153
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c623
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c1110
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c414
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj.h78
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c1353
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c352
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h306
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.c412
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_transfer.h27
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c820
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h125
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video.c797
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video.h138
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c250
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video_vp.c552
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video.c297
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video.h48
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c159
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c143
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video_vp.c202
40 files changed, 15555 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
new file mode 100644
index 00000000000..dfbef2c6a30
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
@@ -0,0 +1,416 @@
+#ifndef RNNDB_NV50_2D_XML
+#define RNNDB_NV50_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_2d.xml ( 11113 bytes, from 2011-07-09 13:43:58)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-07-09 13:43:58)
+- ./rnndb/nv_object.xml ( 12912 bytes, from 2012-07-12 09:41:09)
+- ./rnndb/nvchipsets.xml ( 3736 bytes, from 2012-07-12 09:41:09)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-07-09 13:43:58)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-07-09 13:43:58)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+
+#define NV50_2D_DMA_NOTIFY 0x00000180
+
+#define NV50_2D_DMA_DST 0x00000184
+
+#define NV50_2D_DMA_SRC 0x00000188
+
+#define NV50_2D_DMA_COND 0x0000018c
+
+#define NV50_2D_DST_FORMAT 0x00000200
+
+#define NV50_2D_DST_LINEAR 0x00000204
+
+#define NV50_2D_DST_TILE_MODE 0x00000208
+
+#define NV50_2D_DST_DEPTH 0x0000020c
+
+#define NV50_2D_DST_LAYER 0x00000210
+
+#define NV50_2D_DST_PITCH 0x00000214
+
+#define NV50_2D_DST_WIDTH 0x00000218
+
+#define NV50_2D_DST_HEIGHT 0x0000021c
+
+#define NV50_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NV50_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NV50_2D_UNK228 0x00000228
+
+#define NVC0_2D_UNK228 0x00000228
+
+#define NV50_2D_SRC_FORMAT 0x00000230
+
+#define NV50_2D_SRC_LINEAR 0x00000234
+
+#define NV50_2D_SRC_TILE_MODE 0x00000238
+
+#define NV50_2D_SRC_DEPTH 0x0000023c
+
+#define NV50_2D_SRC_LAYER 0x00000240
+
+#define NVC0_2D_UNK0240 0x00000240
+
+#define NV50_2D_SRC_PITCH 0x00000244
+#define NV50_2D_SRC_PITCH__MAX 0x00040000
+
+#define NV50_2D_SRC_WIDTH 0x00000248
+#define NV50_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NV50_2D_SRC_HEIGHT 0x0000024c
+#define NV50_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NV50_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NV50_2D_UNK258 0x00000258
+
+#define NV50_2D_UNK260 0x00000260
+
+#define NV50_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NV50_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NV50_2D_COND_MODE 0x0000026c
+#define NV50_2D_COND_MODE_NEVER 0x00000000
+#define NV50_2D_COND_MODE_ALWAYS 0x00000001
+#define NV50_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_2D_COND_MODE_EQUAL 0x00000003
+#define NV50_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_2D_CLIP_X 0x00000280
+
+#define NV50_2D_CLIP_Y 0x00000284
+
+#define NV50_2D_CLIP_W 0x00000288
+
+#define NV50_2D_CLIP_H 0x0000028c
+
+#define NV50_2D_CLIP_ENABLE 0x00000290
+
+#define NV50_2D_COLOR_KEY_FORMAT 0x00000294
+#define NV50_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NV50_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NV50_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NV50_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NV50_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NV50_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NV50_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NV50_2D_COLOR_KEY 0x00000298
+
+#define NV50_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NV50_2D_ROP 0x000002a0
+
+#define NV50_2D_BETA1 0x000002a4
+#define NV50_2D_BETA1_BETA1__MASK 0x7f800000
+#define NV50_2D_BETA1_BETA1__SHIFT 23
+
+#define NV50_2D_BETA4 0x000002a8
+#define NV50_2D_BETA4_B__MASK 0x000000ff
+#define NV50_2D_BETA4_B__SHIFT 0
+#define NV50_2D_BETA4_G__MASK 0x0000ff00
+#define NV50_2D_BETA4_G__SHIFT 8
+#define NV50_2D_BETA4_R__MASK 0x00ff0000
+#define NV50_2D_BETA4_R__SHIFT 16
+#define NV50_2D_BETA4_A__MASK 0xff000000
+#define NV50_2D_BETA4_A__SHIFT 24
+
+#define NV50_2D_OPERATION 0x000002ac
+#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NV50_2D_OPERATION_ROP_AND 0x00000001
+#define NV50_2D_OPERATION_BLEND 0x00000002
+#define NV50_2D_OPERATION_SRCCOPY 0x00000003
+#define NV50_2D_OPERATION_ROP 0x00000004
+#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NV50_2D_PATTERN_OFFSET 0x000002b0
+#define NV50_2D_PATTERN_OFFSET_X__MASK 0x0000003f
+#define NV50_2D_PATTERN_OFFSET_X__SHIFT 0
+#define NV50_2D_PATTERN_OFFSET_Y__MASK 0x00003f00
+#define NV50_2D_PATTERN_OFFSET_Y__SHIFT 8
+
+#define NV50_2D_PATTERN_SELECT 0x000002b4
+#define NV50_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NV50_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NV50_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NVC0_2D_UNK2DC 0x000002dc
+
+#define NVC0_2D_UNK2E0 0x000002e0
+
+#define NV50_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NV50_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NV50_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NV50_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NV50_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+#define NV50_2D_PATTERN_COLOR_FORMAT_UNK6 0x00000006
+
+#define NV50_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NV50_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NV50_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NV50_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NV50_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NV50_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NV50_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NV50_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NV50_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NV50_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NV50_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NV50_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NV50_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NV50_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NV50_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NV50_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NV50_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NV50_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NV50_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NV50_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NV50_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NV50_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NV50_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NV50_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NV50_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NV50_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NV50_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NV50_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NV50_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NV50_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NV50_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NV50_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NV50_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NV50_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NV50_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NV50_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NV50_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NV50_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NV50_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NV50_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NV50_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NV50_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NV50_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NV50_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NV50_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NV50_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NV50_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NV50_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NV50_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NV50_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NV50_2D_PATTERN_Y8__LEN 0x00000010
+#define NV50_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NV50_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NV50_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NV50_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NV50_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NV50_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NV50_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NV50_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NVC0_2D_DRAW_COLOR_LONG(i0) (0x00000540 + 0x4*(i0))
+#define NVC0_2D_DRAW_COLOR_LONG__ESIZE 0x00000004
+#define NVC0_2D_DRAW_COLOR_LONG__LEN 0x00000004
+
+#define NV50_2D_DRAW_SHAPE 0x00000580
+#define NV50_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NV50_2D_DRAW_SHAPE_LINES 0x00000001
+#define NV50_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NV50_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NV50_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NV50_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NV50_2D_DRAW_COLOR 0x00000588
+
+#define NV50_2D_UNK58C 0x0000058c
+#define NV50_2D_UNK58C_0 0x00000001
+#define NV50_2D_UNK58C_1 0x00000010
+#define NV50_2D_UNK58C_2 0x00000100
+#define NV50_2D_UNK58C_3 0x00001000
+
+#define NV50_2D_DRAW_POINT16 0x000005e0
+#define NV50_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NV50_2D_DRAW_POINT16_X__SHIFT 0
+#define NV50_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NV50_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NV50_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NV50_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NV50_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NV50_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NV50_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NV50_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NV50_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NV50_2D_SIFC_FORMAT 0x00000804
+
+#define NV50_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NV50_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NV50_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NV50_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NV50_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NV50_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NV50_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NV50_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NV50_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NV50_2D_SIFC_WIDTH 0x00000838
+
+#define NV50_2D_SIFC_HEIGHT 0x0000083c
+
+#define NV50_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NV50_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NV50_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NV50_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NV50_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NV50_2D_SIFC_DST_X_INT 0x00000854
+
+#define NV50_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NV50_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NV50_2D_SIFC_DATA 0x00000860
+
+#define NV50_2D_UNK0870 0x00000870
+
+#define NV50_2D_UNK0880 0x00000880
+
+#define NV50_2D_UNK0884 0x00000884
+
+#define NV50_2D_UNK0888 0x00000888
+
+#define NV50_2D_BLIT_CONTROL 0x0000088c
+#define NV50_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NV50_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NV50_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NV50_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NV50_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NV50_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NV50_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NV50_2D_BLIT_DST_X 0x000008b0
+
+#define NV50_2D_BLIT_DST_Y 0x000008b4
+
+#define NV50_2D_BLIT_DST_W 0x000008b8
+
+#define NV50_2D_BLIT_DST_H 0x000008bc
+
+#define NV50_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NV50_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NV50_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NV50_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NV50_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NV50_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NV50_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NV50_2D_BLIT_SRC_Y_INT 0x000008dc
+
+#define NVC0_2D_FIRMWARE(i0) (0x000008e0 + 0x4*(i0))
+#define NVC0_2D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_2D_FIRMWARE__LEN 0x00000020
+
+
+#endif /* RNNDB_NV50_2D_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
new file mode 100644
index 00000000000..9dff8b2dd13
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_3d.xml.h
@@ -0,0 +1,2110 @@
+#ifndef RNNDB_NV50_3D_XML
+#define RNNDB_NV50_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_3d.xml ( 65226 bytes, from 2012-01-28 13:46:30)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nvchipsets.xml ( 3617 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_3ddefs.xml ( 16394 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_object.xml ( 12672 bytes, from 2011-08-11 18:25:12)
+
+Copyright (C) 2006-2012 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NV50_3D_DMA_NOTIFY 0x00000180
+
+#define NV50_3D_DMA_ZETA 0x00000184
+
+#define NV50_3D_DMA_QUERY 0x00000188
+
+#define NV50_3D_DMA_VTXBUF 0x0000018c
+
+#define NV50_3D_DMA_LOCAL 0x00000190
+
+#define NV50_3D_DMA_STACK 0x00000194
+
+#define NV50_3D_DMA_CODE_CB 0x00000198
+
+#define NV50_3D_DMA_TSC 0x0000019c
+
+#define NV50_3D_DMA_TIC 0x000001a0
+
+#define NV50_3D_DMA_TEXTURE 0x000001a4
+
+#define NV50_3D_DMA_STRMOUT 0x000001a8
+
+#define NV50_3D_DMA_CLIPID 0x000001ac
+
+#define NV50_3D_DMA_COLOR(i0) (0x000001c0 + 0x4*(i0))
+#define NV50_3D_DMA_COLOR__ESIZE 0x00000004
+#define NV50_3D_DMA_COLOR__LEN 0x00000008
+
+#define NV50_3D_RT(i0) (0x00000200 + 0x20*(i0))
+#define NV50_3D_RT__ESIZE 0x00000020
+#define NV50_3D_RT__LEN 0x00000008
+
+#define NV50_3D_RT_ADDRESS_HIGH(i0) (0x00000200 + 0x20*(i0))
+
+#define NV50_3D_RT_ADDRESS_LOW(i0) (0x00000204 + 0x20*(i0))
+
+#define NV50_3D_RT_FORMAT(i0) (0x00000208 + 0x20*(i0))
+
+#define NV50_3D_RT_TILE_MODE(i0) (0x0000020c + 0x20*(i0))
+#define NV50_3D_RT_TILE_MODE_X__MASK 0x0000000f
+#define NV50_3D_RT_TILE_MODE_X__SHIFT 0
+#define NV50_3D_RT_TILE_MODE_Y__MASK 0x000000f0
+#define NV50_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NV50_3D_RT_TILE_MODE_Z__MASK 0x00000f00
+#define NV50_3D_RT_TILE_MODE_Z__SHIFT 8
+
+#define NV50_3D_RT_LAYER_STRIDE(i0) (0x00000210 + 0x20*(i0))
+#define NV50_3D_RT_LAYER_STRIDE__SHR 2
+
+#define NV50_3D_RT_UNK14(i0) (0x00000214 + 0x20*(i0))
+
+#define NV50_3D_VTX_ATTR_1F(i0) (0x00000300 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_1F__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_1F__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_2H(i0) (0x00000340 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2H__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2H__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2H_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2H_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2H_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2H_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2F_X(i0) (0x00000380 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_2F_X__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_2F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_2F_Y(i0) (0x00000384 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_2F_Y__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_2F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_X(i0) (0x00000400 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_X__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_Y(i0) (0x00000404 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_Y__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_3F_Z(i0) (0x00000408 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_3F_Z__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_3F_Z__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_X(i0) (0x00000500 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_X__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_X__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_Y(i0) (0x00000504 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_Y__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_Y__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_Z(i0) (0x00000508 + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_Z__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_Z__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4F_W(i0) (0x0000050c + 0x10*(i0))
+#define NV50_3D_VTX_ATTR_4F_W__ESIZE 0x00000010
+#define NV50_3D_VTX_ATTR_4F_W__LEN 0x00000010
+
+#define NV50_3D_VTX_ATTR_4H_0(i0) (0x00000600 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4H_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4H_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4H_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4H_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4H_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4H_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4H_1(i0) (0x00000604 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4H_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4H_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4H_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4H_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4H_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4H_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2I(i0) (0x00000680 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2I__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2I__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2I_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2I_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2I_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2I_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_2NI(i0) (0x000006c0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_2NI__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_2NI__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_2NI_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_2NI_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_2NI_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_2NI_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4I_0(i0) (0x00000700 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4I_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4I_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4I_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4I_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4I_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4I_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4I_1(i0) (0x00000704 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4I_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4I_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4I_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4I_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4I_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4I_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4NI_0(i0) (0x00000780 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4NI_0__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4NI_0__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NI_0_X__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4NI_0_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NI_0_Y__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4NI_0_Y__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4NI_1(i0) (0x00000784 + 0x8*(i0))
+#define NV50_3D_VTX_ATTR_4NI_1__ESIZE 0x00000008
+#define NV50_3D_VTX_ATTR_4NI_1__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NI_1_Z__MASK 0x0000ffff
+#define NV50_3D_VTX_ATTR_4NI_1_Z__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NI_1_W__MASK 0xffff0000
+#define NV50_3D_VTX_ATTR_4NI_1_W__SHIFT 16
+
+#define NV50_3D_VTX_ATTR_4UB(i0) (0x00000800 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4UB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4UB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4UB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4UB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4UB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4UB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4UB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4UB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4UB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4UB_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4B(i0) (0x00000840 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4B__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4B__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4B_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4B_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4B_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4B_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4B_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4B_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4B_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4B_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4NUB(i0) (0x00000880 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4NUB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4NUB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NUB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4NUB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NUB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4NUB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4NUB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4NUB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4NUB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4NUB_W__SHIFT 24
+
+#define NV50_3D_VTX_ATTR_4NB(i0) (0x000008c0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_4NB__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_4NB__LEN 0x00000010
+#define NV50_3D_VTX_ATTR_4NB_X__MASK 0x000000ff
+#define NV50_3D_VTX_ATTR_4NB_X__SHIFT 0
+#define NV50_3D_VTX_ATTR_4NB_Y__MASK 0x0000ff00
+#define NV50_3D_VTX_ATTR_4NB_Y__SHIFT 8
+#define NV50_3D_VTX_ATTR_4NB_Z__MASK 0x00ff0000
+#define NV50_3D_VTX_ATTR_4NB_Z__SHIFT 16
+#define NV50_3D_VTX_ATTR_4NB_W__MASK 0xff000000
+#define NV50_3D_VTX_ATTR_4NB_W__SHIFT 24
+
+#define NV50_3D_VERTEX_ARRAY_FETCH(i0) (0x00000900 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_FETCH__LEN 0x00000010
+#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NV50_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NV50_3D_VERTEX_ARRAY_FETCH_ENABLE 0x20000000
+
+#define NV50_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00000904 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_START_LOW(i0) (0x00000908 + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_DIVISOR(i0) (0x0000090c + 0x10*(i0))
+#define NV50_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NV50_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NV50_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NV50_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NV50_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NV50_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NV50_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NV50_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NV50_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NV50_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NV50_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NV50_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NV50_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NV50_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NV50_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NV50_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NV50_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NV50_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NV50_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NV50_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NV50_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NV50_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NV50_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NV50_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NV50_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NV50_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NV50_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NV50_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NV50_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NV50_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NV50_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NV50_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NV50_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NV50_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NV50_3D_CLIP_RECT_VERT_MAX__SHIFT 16
+
+#define NV50_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NV50_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NV50_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NV50_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NV50_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NV50_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NV50_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NV50_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NV50_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NV50_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NV50_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NV50_3D_UNK0D60 0x00000d60
+
+#define NV50_3D_UNK0D64 0x00000d64
+
+#define NV50_3D_COUNTER_ENABLE 0x00000d68
+#define NV50_3D_COUNTER_ENABLE_VFETCH_VERTICES 0x00000001
+#define NV50_3D_COUNTER_ENABLE_VFETCH_PRIMITIVES 0x00000002
+#define NV50_3D_COUNTER_ENABLE_VP_LAUNCHES 0x00000004
+#define NV50_3D_COUNTER_ENABLE_GP_LAUNCHES 0x00000008
+#define NV50_3D_COUNTER_ENABLE_GP_PRIMITIVES_OUT 0x00000010
+#define NV50_3D_COUNTER_ENABLE_TRANSFORM_FEEDBACK 0x00000020
+#define NV50_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000040
+#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_PRECLIP 0x00000080
+#define NV50_3D_COUNTER_ENABLE_RAST_PRIMITIVES_POSTCLIP 0x00000100
+#define NV50_3D_COUNTER_ENABLE_FP_PIXELS 0x00000200
+#define NV84_3D_COUNTER_ENABLE_UNK0A 0x00000400
+
+#define NV50_3D_UNK0D6C(i0) (0x00000d6c + 0x4*(i0))
+#define NV50_3D_UNK0D6C__ESIZE 0x00000004
+#define NV50_3D_UNK0D6C__LEN 0x00000002
+#define NV50_3D_UNK0D6C_X__MASK 0x0000ffff
+#define NV50_3D_UNK0D6C_X__SHIFT 0
+#define NV50_3D_UNK0D6C_Y__MASK 0xffff0000
+#define NV50_3D_UNK0D6C_Y__SHIFT 16
+
+#define NV50_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NV50_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NV50_3D_UNK0D7C 0x00000d7c
+
+#define NV50_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NV50_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NV50_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NV50_3D_CLEAR_DEPTH 0x00000d90
+
+#define NV50_3D_STACK_ADDRESS_HIGH 0x00000d94
+
+#define NV50_3D_STACK_ADDRESS_LOW 0x00000d98
+
+#define NV50_3D_STACK_SIZE_LOG 0x00000d9c
+
+#define NV50_3D_CLEAR_STENCIL 0x00000da0
+
+#define NV50_3D_STRMOUT_PARAMS_LATCH 0x00000da4
+
+#define NV50_3D_STRMOUT_PRIMITIVE_LIMIT 0x00000da8
+
+#define NV50_3D_POLYGON_MODE_FRONT 0x00000dac
+#define NV50_3D_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NV50_3D_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NV50_3D_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NV50_3D_POLYGON_MODE_BACK 0x00000db0
+#define NV50_3D_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NV50_3D_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NV50_3D_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NV50_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NV50_3D_UNK0DB8 0x00000db8
+
+#define NV50_3D_ZCULL_UNK0DBC 0x00000dbc
+#define NV50_3D_ZCULL_UNK0DBC_UNK0 0x00000001
+#define NV50_3D_ZCULL_UNK0DBC_UNK16__MASK 0x00030000
+#define NV50_3D_ZCULL_UNK0DBC_UNK16__SHIFT 16
+
+#define NV50_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NV50_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NV50_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NV50_3D_UNK0DCC 0x00000dcc
+
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0(i0) (0x00000dd0 + 0x4*(i0))
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__ESIZE 0x00000004
+#define NV50_3D_VTX_ATTR_MASK_UNK0DD0__LEN 0x00000002
+
+#define NV50_3D_ZCULL_UNK0DD8 0x00000dd8
+#define NV50_3D_ZCULL_UNK0DD8_UNK0__MASK 0x00000007
+#define NV50_3D_ZCULL_UNK0DD8_UNK0__SHIFT 0
+#define NVA3_3D_ZCULL_UNK0DD8_UNK9 0x00000200
+#define NV50_3D_ZCULL_UNK0DD8_UNK16__MASK 0xffff0000
+#define NV50_3D_ZCULL_UNK0DD8_UNK16__SHIFT 16
+
+#define NV50_3D_UNK0DDC 0x00000ddc
+
+#define NV50_3D_UNK0DE0 0x00000de0
+
+#define NV50_3D_WATCHDOG_TIMER 0x00000de4
+
+#define NV50_3D_UNK0DE8 0x00000de8
+
+#define NV50_3D_UNK0DEC 0x00000dec
+
+#define NV50_3D_UNK0DF0 0x00000df0
+#define NV50_3D_UNK0DF0_UNK0 0x00000001
+#define NV50_3D_UNK0DF0_UNK1__MASK 0x00000ff0
+#define NV50_3D_UNK0DF0_UNK1__SHIFT 4
+
+#define NV50_3D_UNK0DF4 0x00000df4
+
+#define NV50_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NV50_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NV50_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NV50_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NV50_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NV50_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NV50_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NV50_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NV50_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NV50_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NV50_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NV50_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NV50_3D_SCISSOR_VERT__LEN 0x00000010
+#define NV50_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NV50_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NV50_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NV50_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NV50_3D_CB_ADDR 0x00000f00
+#define NV50_3D_CB_ADDR_ID__MASK 0x003fff00
+#define NV50_3D_CB_ADDR_ID__SHIFT 8
+#define NV50_3D_CB_ADDR_BUFFER__MASK 0x0000007f
+#define NV50_3D_CB_ADDR_BUFFER__SHIFT 0
+
+#define NV50_3D_CB_DATA(i0) (0x00000f04 + 0x4*(i0))
+#define NV50_3D_CB_DATA__ESIZE 0x00000004
+#define NV50_3D_CB_DATA__LEN 0x00000010
+
+#define NV50_3D_LOCAL_WARPS_LOG_ALLOC 0x00000f44
+
+#define NV50_3D_LOCAL_WARPS_NO_CLAMP 0x00000f48
+
+#define NV50_3D_STACK_WARPS_LOG_ALLOC 0x00000f4c
+
+#define NV50_3D_STACK_WARPS_NO_CLAMP 0x00000f50
+
+#define NV50_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NV50_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NV50_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NV50_3D_UNK0F60(i0) (0x00000f60 + 0x4*(i0))
+#define NV50_3D_UNK0F60__ESIZE 0x00000004
+#define NV50_3D_UNK0F60__LEN 0x00000004
+
+#define NV50_3D_GP_ADDRESS_HIGH 0x00000f70
+
+#define NV50_3D_GP_ADDRESS_LOW 0x00000f74
+
+#define NV50_3D_UNK0F78 0x00000f78
+
+#define NV50_3D_VP_ADDRESS_HIGH 0x00000f7c
+
+#define NV50_3D_VP_ADDRESS_LOW 0x00000f80
+
+#define NV50_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NV50_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NV50_3D_UNK0F8C 0x00000f8c
+
+#define NV50_3D_COLOR_MASK_COMMON 0x00000f90
+
+#define NV50_3D_UNK0F94 0x00000f94
+
+#define NV50_3D_UNK0F98 0x00000f98
+
+#define NV50_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NV50_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NV50_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NV50_3D_FP_ADDRESS_HIGH 0x00000fa4
+
+#define NV50_3D_FP_ADDRESS_LOW 0x00000fa8
+
+#define NV50_3D_UNK0FAC 0x00000fac
+#define NV50_3D_UNK0FAC_UNK0 0x00000001
+#define NVA0_3D_UNK0FAC_UNK2 0x00000002
+#define NV50_3D_UNK0FAC_UNK1__MASK 0x000ffff0
+#define NV50_3D_UNK0FAC_UNK1__SHIFT 4
+
+#define NV50_3D_UNK0FB0 0x00000fb0
+
+#define NV50_3D_UNK0FB4 0x00000fb4
+
+#define NV50_3D_UNK0FB8 0x00000fb8
+
+#define NV50_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NV50_3D_MSAA_MASK__ESIZE 0x00000004
+#define NV50_3D_MSAA_MASK__LEN 0x00000004
+
+#define NV50_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NV50_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NV50_3D_SEMANTIC_VIEWPORT 0x00000fd4
+#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_VIEWPORT_VIEWPORT_ID__SHIFT 0
+
+#define NV50_3D_UNK0FD8 0x00000fd8
+#define NV50_3D_UNK0FD8_UNK0 0x00000001
+#define NV50_3D_UNK0FD8_UNK1 0x00000010
+
+#define NV50_3D_UNK0FDC 0x00000fdc
+
+#define NV50_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NV50_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NV50_3D_ZETA_FORMAT 0x00000fe8
+
+#define NV50_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NV50_3D_ZETA_LAYER_STRIDE 0x00000ff0
+#define NV50_3D_ZETA_LAYER_STRIDE__SHR 2
+
+#define NV50_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NV50_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NV50_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NV50_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NV50_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NV50_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NV50_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NV50_3D_UNK0FFC 0x00000ffc
+
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001000 + 0x4*(i0))
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NV50_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000010
+
+#define NV50_3D_UNK1040(i0) (0x00001040 + 0x4*(i0))
+#define NV50_3D_UNK1040__ESIZE 0x00000004
+#define NV50_3D_UNK1040__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001080 + 0x8*(i0))
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NV50_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000010
+
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001084 + 0x8*(i0))
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NV50_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000010
+
+#define NV50_3D_UNK1100 0x00001100
+
+#define NV84_3D_UNK1104 0x00001104
+#define NV84_3D_UNK1104_0__MASK 0x0000ffff
+#define NV84_3D_UNK1104_0__SHIFT 0
+#define NV84_3D_UNK1104_0__MAX 0x00002000
+#define NV84_3D_UNK1104_0__ALIGN 0x00000040
+#define NV84_3D_UNK1104_1__MASK 0xffff0000
+#define NV84_3D_UNK1104_1__SHIFT 16
+#define NV84_3D_UNK1104_1__MAX 0x00002000
+#define NV84_3D_UNK1104_1__ALIGN 0x00000040
+
+#define NV84_3D_UNK1108 0x00001108
+#define NV84_3D_UNK1108_0 0x00000001
+#define NV84_3D_UNK1108_1 0x00000010
+
+#define NV84_3D_UNK110C 0x0000110c
+
+#define NV84_3D_UNK1110 0x00001110
+
+#define NV84_3D_WRCACHE_FLUSH 0x00001114
+
+#define NV84_3D_VERTEX_ID_BASE 0x00001118
+
+#define NV84_3D_PRIMITIVE_ID 0x0000111c
+
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT(i0) (0x00001120 + 0x4*(i0))
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE 0x00000004
+#define NVA3_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x00000004
+
+#define NVA3_3D_VP_ATTR_EN_ALT(i0) (0x00001130 + 0x4*(i0))
+#define NVA3_3D_VP_ATTR_EN_ALT__ESIZE 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT__LEN 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT_7__MASK 0xf0000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7__SHIFT 28
+#define NVA3_3D_VP_ATTR_EN_ALT_7_X 0x10000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_Y 0x20000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_Z 0x40000000
+#define NVA3_3D_VP_ATTR_EN_ALT_7_W 0x80000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6__MASK 0x0f000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6__SHIFT 24
+#define NVA3_3D_VP_ATTR_EN_ALT_6_X 0x01000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_Y 0x02000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_Z 0x04000000
+#define NVA3_3D_VP_ATTR_EN_ALT_6_W 0x08000000
+#define NVA3_3D_VP_ATTR_EN_ALT_5__MASK 0x00f00000
+#define NVA3_3D_VP_ATTR_EN_ALT_5__SHIFT 20
+#define NVA3_3D_VP_ATTR_EN_ALT_5_X 0x00100000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_Y 0x00200000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_Z 0x00400000
+#define NVA3_3D_VP_ATTR_EN_ALT_5_W 0x00800000
+#define NVA3_3D_VP_ATTR_EN_ALT_4__MASK 0x000f0000
+#define NVA3_3D_VP_ATTR_EN_ALT_4__SHIFT 16
+#define NVA3_3D_VP_ATTR_EN_ALT_4_X 0x00010000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_Y 0x00020000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_Z 0x00040000
+#define NVA3_3D_VP_ATTR_EN_ALT_4_W 0x00080000
+#define NVA3_3D_VP_ATTR_EN_ALT_3__MASK 0x0000f000
+#define NVA3_3D_VP_ATTR_EN_ALT_3__SHIFT 12
+#define NVA3_3D_VP_ATTR_EN_ALT_3_X 0x00001000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_Y 0x00002000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_Z 0x00004000
+#define NVA3_3D_VP_ATTR_EN_ALT_3_W 0x00008000
+#define NVA3_3D_VP_ATTR_EN_ALT_2__MASK 0x00000f00
+#define NVA3_3D_VP_ATTR_EN_ALT_2__SHIFT 8
+#define NVA3_3D_VP_ATTR_EN_ALT_2_X 0x00000100
+#define NVA3_3D_VP_ATTR_EN_ALT_2_Y 0x00000200
+#define NVA3_3D_VP_ATTR_EN_ALT_2_Z 0x00000400
+#define NVA3_3D_VP_ATTR_EN_ALT_2_W 0x00000800
+#define NVA3_3D_VP_ATTR_EN_ALT_1__MASK 0x000000f0
+#define NVA3_3D_VP_ATTR_EN_ALT_1__SHIFT 4
+#define NVA3_3D_VP_ATTR_EN_ALT_1_X 0x00000010
+#define NVA3_3D_VP_ATTR_EN_ALT_1_Y 0x00000020
+#define NVA3_3D_VP_ATTR_EN_ALT_1_Z 0x00000040
+#define NVA3_3D_VP_ATTR_EN_ALT_1_W 0x00000080
+#define NVA3_3D_VP_ATTR_EN_ALT_0__MASK 0x0000000f
+#define NVA3_3D_VP_ATTR_EN_ALT_0__SHIFT 0
+#define NVA3_3D_VP_ATTR_EN_ALT_0_X 0x00000001
+#define NVA3_3D_VP_ATTR_EN_ALT_0_Y 0x00000002
+#define NVA3_3D_VP_ATTR_EN_ALT_0_Z 0x00000004
+#define NVA3_3D_VP_ATTR_EN_ALT_0_W 0x00000008
+
+#define NVA3_3D_UNK1140 0x00001140
+
+#define NVA0_3D_UNK1144 0x00001144
+
+#define NVA0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVA0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVA0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVA0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVA0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVA0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVA0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVA0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT(i0) (0x00001160 + 0x4*(i0))
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__ESIZE 0x00000004
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT__LEN 0x00000020
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__MASK 0x0000001f
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BUFFER__SHIFT 0
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_CONST 0x00000040
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__MASK 0x001fff80
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_OFFSET__SHIFT 7
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__MASK 0x07e00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT__SHIFT 21
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32_32 0x00200000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32_32 0x00400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16_16 0x00600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32_32 0x00800000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16_16 0x00a00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8_8 0x01400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16_16 0x01e00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_32 0x02400000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8_8 0x02600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8_8 0x03000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_16 0x03600000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_8 0x03a00000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_FORMAT_10_10_10_2 0x06000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__MASK 0x38000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE__SHIFT 27
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SNORM 0x08000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UNORM 0x10000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SINT 0x18000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_UINT 0x20000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_USCALED 0x28000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_SSCALED 0x30000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_TYPE_FLOAT 0x38000000
+#define NVA3_3D_VERTEX_ARRAY_ATTRIB_ALT_BGRA 0x80000000
+
+#define NV50_3D_RT_CONTROL 0x0000121c
+#define NV50_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NV50_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NV50_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NV50_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NV50_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NV50_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NV50_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NV50_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NV50_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NV50_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NV50_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NV50_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NV50_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NV50_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NV50_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NV50_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NV50_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NV50_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NV50_3D_UNK1220 0x00001220
+
+#define NV50_3D_RT_ARRAY_MODE 0x00001224
+#define NV50_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NV50_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NV50_3D_RT_ARRAY_MODE_MODE__MASK 0x00010000
+#define NV50_3D_RT_ARRAY_MODE_MODE__SHIFT 16
+#define NV50_3D_RT_ARRAY_MODE_MODE_2D_ARRAY 0x00000000
+#define NV50_3D_RT_ARRAY_MODE_MODE_3D 0x00010000
+
+#define NV50_3D_ZETA_HORIZ 0x00001228
+
+#define NV50_3D_ZETA_VERT 0x0000122c
+
+#define NV50_3D_ZETA_ARRAY_MODE 0x00001230
+#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NV50_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NV50_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NV50_3D_LINKED_TSC 0x00001234
+
+#define NV50_3D_UNK1238 0x00001238
+
+#define NVA0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NV50_3D_RT_HORIZ(i0) (0x00001240 + 0x8*(i0))
+#define NV50_3D_RT_HORIZ__ESIZE 0x00000008
+#define NV50_3D_RT_HORIZ__LEN 0x00000008
+#define NV50_3D_RT_HORIZ_WIDTH__MASK 0x0fffffff
+#define NV50_3D_RT_HORIZ_WIDTH__SHIFT 0
+#define NV50_3D_RT_HORIZ_LINEAR 0x80000000
+
+#define NV50_3D_RT_VERT(i0) (0x00001244 + 0x8*(i0))
+#define NV50_3D_RT_VERT__ESIZE 0x00000008
+#define NV50_3D_RT_VERT__LEN 0x00000008
+
+#define NV50_3D_CB_DEF_ADDRESS_HIGH 0x00001280
+
+#define NV50_3D_CB_DEF_ADDRESS_LOW 0x00001284
+
+#define NV50_3D_CB_DEF_SET 0x00001288
+#define NV50_3D_CB_DEF_SET_SIZE__MASK 0x0000ffff
+#define NV50_3D_CB_DEF_SET_SIZE__SHIFT 0
+#define NV50_3D_CB_DEF_SET_BUFFER__MASK 0x007f0000
+#define NV50_3D_CB_DEF_SET_BUFFER__SHIFT 16
+
+#define NV50_3D_UNK128C 0x0000128c
+#define NV50_3D_UNK128C_0__MASK 0x00000003
+#define NV50_3D_UNK128C_0__SHIFT 0
+#define NV50_3D_UNK128C_1__MASK 0x00000030
+#define NV50_3D_UNK128C_1__SHIFT 4
+#define NV50_3D_UNK128C_2__MASK 0x00000300
+#define NV50_3D_UNK128C_2__SHIFT 8
+#define NV50_3D_UNK128C_3__MASK 0x00003000
+#define NV50_3D_UNK128C_3__SHIFT 12
+
+#define NV50_3D_CALL_LIMIT_LOG 0x00001290
+#define NV50_3D_CALL_LIMIT_LOG_VP__MASK 0x0000000f
+#define NV50_3D_CALL_LIMIT_LOG_VP__SHIFT 0
+#define NV50_3D_CALL_LIMIT_LOG_GP__MASK 0x000000f0
+#define NV50_3D_CALL_LIMIT_LOG_GP__SHIFT 4
+#define NV50_3D_CALL_LIMIT_LOG_FP__MASK 0x00000f00
+#define NV50_3D_CALL_LIMIT_LOG_FP__SHIFT 8
+
+#define NV50_3D_STRMOUT_BUFFERS_CTRL 0x00001294
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED 0x00000001
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__MASK 0x00000002
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE__SHIFT 1
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_PRIMITIVES 0x00000000
+#define NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET 0x00000002
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__MASK 0x000000f0
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT 4
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MASK 0x000fff00
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT 8
+#define NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX 0x00000800
+
+#define NV50_3D_FP_RESULT_COUNT 0x00001298
+
+#define NV50_3D_VTX_UNK129C 0x0000129c
+
+#define NV50_3D_UNK12A0 0x000012a0
+
+#define NV50_3D_UNK12A8 0x000012a8
+#define NV50_3D_UNK12A8_UNK1 0x00000001
+#define NV50_3D_UNK12A8_UNK2__MASK 0x000ffff0
+#define NV50_3D_UNK12A8_UNK2__SHIFT 4
+
+#define NV50_3D_UNK12AC 0x000012ac
+
+#define NV50_3D_UNK12B0 0x000012b0
+#define NV50_3D_UNK12B0_UNK0__MASK 0x000000ff
+#define NV50_3D_UNK12B0_UNK0__SHIFT 0
+#define NV50_3D_UNK12B0_UNK1__MASK 0x0000ff00
+#define NV50_3D_UNK12B0_UNK1__SHIFT 8
+#define NV50_3D_UNK12B0_UNK2__MASK 0x00ff0000
+#define NV50_3D_UNK12B0_UNK2__SHIFT 16
+#define NV50_3D_UNK12B0_UNK3__MASK 0xff000000
+#define NV50_3D_UNK12B0_UNK3__SHIFT 24
+#define NV50_3D_UNK12B0_UNK3__MAX 0x00000080
+
+#define NV50_3D_UNK12B4 0x000012b4
+
+#define NV50_3D_UNK12B8 0x000012b8
+
+#define NV50_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NV50_3D_D3D_FILL_MODE 0x000012d0
+#define NV50_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NV50_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NV50_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NV50_3D_SHADE_MODEL 0x000012d4
+#define NV50_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NV50_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NV50_3D_LOCAL_ADDRESS_HIGH 0x000012d8
+
+#define NV50_3D_LOCAL_ADDRESS_LOW 0x000012dc
+
+#define NV50_3D_LOCAL_SIZE_LOG 0x000012e0
+
+#define NV50_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NV50_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NV50_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NV50_3D_PM_SET(i0) (0x000012f0 + 0x4*(i0))
+#define NV50_3D_PM_SET__ESIZE 0x00000004
+#define NV50_3D_PM_SET__LEN 0x00000004
+
+#define NV50_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NV50_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NV50_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NV50_3D_VB_ELEMENT_U8 0x00001304
+#define NV50_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NV50_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NV50_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NV50_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NV50_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NV50_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NV50_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NV50_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NV50_3D_D3D_CULL_MODE 0x00001308
+#define NV50_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NV50_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NV50_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NV50_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NV50_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NV50_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NV50_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NV50_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NV50_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NV50_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NV50_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_ALPHA_TEST_REF 0x00001310
+
+#define NV50_3D_ALPHA_TEST_FUNC 0x00001314
+#define NV50_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NV50_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NV50_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NV50_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NV50_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NV50_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NV50_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVA0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVA0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVA0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NV50_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NV50_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NV50_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NV50_3D_UNK132C 0x0000132c
+
+#define NV50_3D_TSC_FLUSH 0x00001330
+#define NV50_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NV50_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_3D_TIC_FLUSH 0x00001334
+#define NV50_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NV50_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NV50_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NV50_3D_TEX_CACHE_CTL 0x00001338
+#define NV50_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NV50_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NV50_3D_BLEND_SEPARATE_ALPHA 0x0000133c
+
+#define NV50_3D_BLEND_EQUATION_RGB 0x00001340
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NV50_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NV50_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NV50_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NV50_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NV50_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NV50_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NV50_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NV50_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NV50_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NV50_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NV50_3D_UNK1354 0x00001354
+
+#define NV50_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NV50_3D_BLEND_ENABLE_COMMON 0x0000135c
+
+#define NV50_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NV50_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NV50_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NV50_3D_STENCIL_ENABLE 0x00001380
+
+#define NV50_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NV50_3D_STENCIL_FRONT_MASK 0x00001398
+
+#define NV50_3D_STENCIL_FRONT_FUNC_MASK 0x0000139c
+
+#define NV50_3D_UNK13A0 0x000013a0
+
+#define NVA0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NV50_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NV50_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NV50_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NV50_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NV50_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NV50_3D_LINE_WIDTH 0x000013b0
+
+#define NV50_3D_TEX_LIMITS(i0) (0x000013b4 + 0x4*(i0))
+#define NV50_3D_TEX_LIMITS__ESIZE 0x00000004
+#define NV50_3D_TEX_LIMITS__LEN 0x00000003
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NV50_3D_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NV50_3D_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NV50_3D_POINT_COORD_REPLACE_MAP(i0) (0x000013c0 + 0x4*(i0))
+#define NV50_3D_POINT_COORD_REPLACE_MAP__ESIZE 0x00000004
+#define NV50_3D_POINT_COORD_REPLACE_MAP__LEN 0x00000010
+
+#define NV50_3D_UNK1400_LANES 0x00001400
+
+#define NV50_3D_UNK1404 0x00001404
+
+#define NV50_3D_UNK1408 0x00001408
+
+#define NV50_3D_VP_START_ID 0x0000140c
+
+#define NV50_3D_GP_START_ID 0x00001410
+
+#define NV50_3D_FP_START_ID 0x00001414
+
+#define NVA3_3D_UNK1418 0x00001418
+
+#define NV50_3D_UNK141C 0x0000141c
+
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NV50_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NV50_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NV50_3D_UNK1430 0x00001430
+#define NV50_3D_UNK1430_UNK0 0x00000010
+#define NV50_3D_UNK1430_UNK1 0x00000100
+
+#define NV50_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NV50_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NV50_3D_CLEAR_FLAGS 0x0000143c
+#define NV50_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__MASK 0x00000010
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT__SHIFT 4
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_SCISSOR 0x00000000
+#define NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT 0x00000010
+
+#define NV50_3D_CODE_CB_FLUSH 0x00001440
+
+#define NV50_3D_BIND_TSC(i0) (0x00001444 + 0x8*(i0))
+#define NV50_3D_BIND_TSC__ESIZE 0x00000008
+#define NV50_3D_BIND_TSC__LEN 0x00000003
+#define NV50_3D_BIND_TSC_VALID 0x00000001
+#define NV50_3D_BIND_TSC_SAMPLER__MASK 0x000000f0
+#define NV50_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NV50_3D_BIND_TSC_TSC__MASK 0x001ff000
+#define NV50_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NV50_3D_BIND_TIC(i0) (0x00001448 + 0x8*(i0))
+#define NV50_3D_BIND_TIC__ESIZE 0x00000008
+#define NV50_3D_BIND_TIC__LEN 0x00000003
+#define NV50_3D_BIND_TIC_VALID 0x00000001
+#define NV50_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NV50_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NV50_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NV50_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NV50_3D_BIND_TSC2(i0) (0x00001468 + 0x8*(i0))
+#define NV50_3D_BIND_TSC2__ESIZE 0x00000008
+#define NV50_3D_BIND_TSC2__LEN 0x00000003
+#define NV50_3D_BIND_TSC2_VALID 0x00000001
+#define NV50_3D_BIND_TSC2_SAMPLER__MASK 0x00000010
+#define NV50_3D_BIND_TSC2_SAMPLER__SHIFT 4
+#define NV50_3D_BIND_TSC2_TSC__MASK 0x001ff000
+#define NV50_3D_BIND_TSC2_TSC__SHIFT 12
+
+#define NV50_3D_BIND_TIC2(i0) (0x0000146c + 0x8*(i0))
+#define NV50_3D_BIND_TIC2__ESIZE 0x00000008
+#define NV50_3D_BIND_TIC2__LEN 0x00000003
+#define NV50_3D_BIND_TIC2_VALID 0x00000001
+#define NV50_3D_BIND_TIC2_TEXTURE__MASK 0x00000002
+#define NV50_3D_BIND_TIC2_TEXTURE__SHIFT 1
+#define NV50_3D_BIND_TIC2_TIC__MASK 0x7ffffe00
+#define NV50_3D_BIND_TIC2_TIC__SHIFT 9
+
+#define NV50_3D_STRMOUT_MAP(i0) (0x00001480 + 0x4*(i0))
+#define NV50_3D_STRMOUT_MAP__ESIZE 0x00000004
+#define NV50_3D_STRMOUT_MAP__LEN 0x00000020
+
+#define NV50_3D_CLIPID_HEIGHT 0x00001504
+#define NV50_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NV50_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NV50_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NV50_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NV50_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
+#define NV50_3D_CLIP_DISTANCE_ENABLE 0x00001510
+#define NV50_3D_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NV50_3D_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NV50_3D_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NV50_3D_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NV50_3D_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NV50_3D_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NV50_3D_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NV50_3D_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NV50_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NV50_3D_POINT_SIZE 0x00001518
+
+#define NV50_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
+
+#define NV50_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVA0_3D_UNK152C 0x0000152c
+#define NVA0_3D_UNK152C_UNK0 0x00000001
+#define NVA0_3D_UNK152C_UNK1 0x00000010
+#define NVA0_3D_UNK152C_UNK2 0x00000100
+#define NVA0_3D_UNK152C_UNK3__MASK 0x000ff000
+#define NVA0_3D_UNK152C_UNK3__SHIFT 12
+#define NVA0_3D_UNK152C_UNK3__MAX 0x00000028
+
+#define NV50_3D_COUNTER_RESET 0x00001530
+#define NV50_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NV50_3D_COUNTER_RESET_ZCULL_STATS 0x00000002
+#define NVA0_3D_COUNTER_RESET_STRMOUT_VERTICES 0x00000008
+#define NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK 0x00000010
+#define NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x00000011
+#define NV50_3D_COUNTER_RESET_VFETCH_VERTICES 0x00000012
+#define NV50_3D_COUNTER_RESET_VFETCH_PRIMITIVES 0x00000013
+#define NV50_3D_COUNTER_RESET_VP_LAUNCHES 0x00000015
+#define NV50_3D_COUNTER_RESET_GP_LAUNCHES 0x0000001a
+#define NV50_3D_COUNTER_RESET_GP_PRIMITIVES_OUT 0x0000001b
+#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_PRECLIP 0x0000001c
+#define NV50_3D_COUNTER_RESET_RAST_PRIMITIVES_POSTCLIP 0x0000001d
+#define NV50_3D_COUNTER_RESET_FP_PIXELS 0x0000001e
+
+#define NV50_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NV50_3D_ZETA_ENABLE 0x00001538
+
+#define NV50_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NV50_3D_NOPERSPECTIVE_BITMAP(i0) (0x00001540 + 0x4*(i0))
+#define NV50_3D_NOPERSPECTIVE_BITMAP__ESIZE 0x00000004
+#define NV50_3D_NOPERSPECTIVE_BITMAP__LEN 0x00000004
+
+#define NV50_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NV50_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NV50_3D_COND_MODE 0x00001558
+#define NV50_3D_COND_MODE_NEVER 0x00000000
+#define NV50_3D_COND_MODE_ALWAYS 0x00000001
+#define NV50_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NV50_3D_COND_MODE_EQUAL 0x00000003
+#define NV50_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NV50_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NV50_3D_TSC_ADDRESS_LOW 0x00001560
+#define NV50_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NV50_3D_TSC_LIMIT 0x00001564
+#define NV50_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NV50_3D_UNK1568 0x00001568
+
+#define NV50_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NV50_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NV50_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NV50_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NV50_3D_TIC_LIMIT 0x0000157c
+
+#define NV50_3D_PM_CONTROL(i0) (0x00001580 + 0x4*(i0))
+#define NV50_3D_PM_CONTROL__ESIZE 0x00000004
+#define NV50_3D_PM_CONTROL__LEN 0x00000004
+#define NV50_3D_PM_CONTROL_UNK0 0x00000001
+#define NV50_3D_PM_CONTROL_UNK1__MASK 0x00000070
+#define NV50_3D_PM_CONTROL_UNK1__SHIFT 4
+#define NV50_3D_PM_CONTROL_UNK2__MASK 0x00ffff00
+#define NV50_3D_PM_CONTROL_UNK2__SHIFT 8
+#define NV50_3D_PM_CONTROL_UNK3__MASK 0xff000000
+#define NV50_3D_PM_CONTROL_UNK3__SHIFT 24
+
+#define NV50_3D_ZCULL_REGION 0x00001590
+
+#define NV50_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NV50_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NV50_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NV50_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NV50_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NV50_3D_UNK15A8 0x000015a8
+#define NV50_3D_UNK15A8_UNK1__MASK 0x00000007
+#define NV50_3D_UNK15A8_UNK1__SHIFT 0
+#define NV50_3D_UNK15A8_UNK2__MASK 0x00000070
+#define NV50_3D_UNK15A8_UNK2__SHIFT 4
+
+#define NV50_3D_UNK15AC 0x000015ac
+
+#define NV50_3D_UNK15B0 0x000015b0
+#define NV50_3D_UNK15B0_0 0x00000001
+#define NV50_3D_UNK15B0_1 0x00000010
+#define NV50_3D_UNK15B0_2 0x00000100
+
+#define NV50_3D_CSAA_ENABLE 0x000015b4
+
+#define NV50_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NV50_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVA3_3D_UNK15C4 0x000015c4
+
+#define NVA3_3D_UNK15C8 0x000015c8
+
+#define NV50_3D_LAYER 0x000015cc
+#define NV50_3D_LAYER_IDX__MASK 0x0000ffff
+#define NV50_3D_LAYER_IDX__SHIFT 0
+#define NV50_3D_LAYER_USE_GP 0x00010000
+
+#define NV50_3D_MULTISAMPLE_MODE 0x000015d0
+#define NV50_3D_MULTISAMPLE_MODE_MS1 0x00000000
+#define NV50_3D_MULTISAMPLE_MODE_MS2 0x00000001
+#define NV50_3D_MULTISAMPLE_MODE_MS4 0x00000002
+#define NV50_3D_MULTISAMPLE_MODE_MS8 0x00000003
+#define NV50_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004
+#define NV50_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005
+#define NV50_3D_MULTISAMPLE_MODE_UNK6 0x00000006
+#define NV50_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008
+#define NV50_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009
+#define NV50_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a
+#define NV50_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b
+
+#define NV50_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NV50_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+#define NV84_3D_VERTEX_BEGIN_D3D_PRIMITIVE_ID_CONT 0x20000000
+#define NVA0_3D_VERTEX_BEGIN_D3D_INSTANCE_CONT 0x40000000
+
+#define NV50_3D_VERTEX_END_D3D 0x000015d8
+#define NV50_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVA0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NV50_3D_VERTEX_BEGIN_GL 0x000015dc
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x10000000
+#define NV84_3D_VERTEX_BEGIN_GL_PRIMITIVE_ID_CONT 0x20000000
+#define NVA0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x40000000
+
+#define NV50_3D_VERTEX_END_GL 0x000015e0
+#define NV50_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVA0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NV50_3D_EDGEFLAG 0x000015e4
+
+#define NV50_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NV50_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NV50_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NV50_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NV50_3D_VB_ELEMENT_U16 0x000015f0
+#define NV50_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NV50_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NV50_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NV50_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NV50_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NV50_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NV50_3D_VERTEX_DATA 0x00001640
+
+#define NV50_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NV50_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NV50_3D_VP_ATTR_EN(i0) (0x00001650 + 0x4*(i0))
+#define NV50_3D_VP_ATTR_EN__ESIZE 0x00000004
+#define NV50_3D_VP_ATTR_EN__LEN 0x00000002
+#define NV50_3D_VP_ATTR_EN_7__MASK 0xf0000000
+#define NV50_3D_VP_ATTR_EN_7__SHIFT 28
+#define NV50_3D_VP_ATTR_EN_7_X 0x10000000
+#define NV50_3D_VP_ATTR_EN_7_Y 0x20000000
+#define NV50_3D_VP_ATTR_EN_7_Z 0x40000000
+#define NV50_3D_VP_ATTR_EN_7_W 0x80000000
+#define NV50_3D_VP_ATTR_EN_6__MASK 0x0f000000
+#define NV50_3D_VP_ATTR_EN_6__SHIFT 24
+#define NV50_3D_VP_ATTR_EN_6_X 0x01000000
+#define NV50_3D_VP_ATTR_EN_6_Y 0x02000000
+#define NV50_3D_VP_ATTR_EN_6_Z 0x04000000
+#define NV50_3D_VP_ATTR_EN_6_W 0x08000000
+#define NV50_3D_VP_ATTR_EN_5__MASK 0x00f00000
+#define NV50_3D_VP_ATTR_EN_5__SHIFT 20
+#define NV50_3D_VP_ATTR_EN_5_X 0x00100000
+#define NV50_3D_VP_ATTR_EN_5_Y 0x00200000
+#define NV50_3D_VP_ATTR_EN_5_Z 0x00400000
+#define NV50_3D_VP_ATTR_EN_5_W 0x00800000
+#define NV50_3D_VP_ATTR_EN_4__MASK 0x000f0000
+#define NV50_3D_VP_ATTR_EN_4__SHIFT 16
+#define NV50_3D_VP_ATTR_EN_4_X 0x00010000
+#define NV50_3D_VP_ATTR_EN_4_Y 0x00020000
+#define NV50_3D_VP_ATTR_EN_4_Z 0x00040000
+#define NV50_3D_VP_ATTR_EN_4_W 0x00080000
+#define NV50_3D_VP_ATTR_EN_3__MASK 0x0000f000
+#define NV50_3D_VP_ATTR_EN_3__SHIFT 12
+#define NV50_3D_VP_ATTR_EN_3_X 0x00001000
+#define NV50_3D_VP_ATTR_EN_3_Y 0x00002000
+#define NV50_3D_VP_ATTR_EN_3_Z 0x00004000
+#define NV50_3D_VP_ATTR_EN_3_W 0x00008000
+#define NV50_3D_VP_ATTR_EN_2__MASK 0x00000f00
+#define NV50_3D_VP_ATTR_EN_2__SHIFT 8
+#define NV50_3D_VP_ATTR_EN_2_X 0x00000100
+#define NV50_3D_VP_ATTR_EN_2_Y 0x00000200
+#define NV50_3D_VP_ATTR_EN_2_Z 0x00000400
+#define NV50_3D_VP_ATTR_EN_2_W 0x00000800
+#define NV50_3D_VP_ATTR_EN_1__MASK 0x000000f0
+#define NV50_3D_VP_ATTR_EN_1__SHIFT 4
+#define NV50_3D_VP_ATTR_EN_1_X 0x00000010
+#define NV50_3D_VP_ATTR_EN_1_Y 0x00000020
+#define NV50_3D_VP_ATTR_EN_1_Z 0x00000040
+#define NV50_3D_VP_ATTR_EN_1_W 0x00000080
+#define NV50_3D_VP_ATTR_EN_0__MASK 0x0000000f
+#define NV50_3D_VP_ATTR_EN_0__SHIFT 0
+#define NV50_3D_VP_ATTR_EN_0_X 0x00000001
+#define NV50_3D_VP_ATTR_EN_0_Y 0x00000002
+#define NV50_3D_VP_ATTR_EN_0_Z 0x00000004
+#define NV50_3D_VP_ATTR_EN_0_W 0x00000008
+
+#define NV50_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NV50_3D_POINT_RASTER_RULES 0x0000165c
+#define NV50_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NV50_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NV50_3D_POINT_SPRITE_CTRL 0x00001660
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__MASK 0x00000010
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN__SHIFT 4
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NV50_3D_POINT_SPRITE_CTRL_COORD_ORIGIN_UPPER_LEFT 0x00000010
+
+#define NVA0_3D_TEX_MISC 0x00001664
+#define NVA0_3D_TEX_MISC_UNK1 0x00000002
+#define NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NV50_3D_LINE_SMOOTH_BLUR 0x00001668
+#define NV50_3D_LINE_SMOOTH_BLUR_LOW 0x00000000
+#define NV50_3D_LINE_SMOOTH_BLUR_MEDIUM 0x00000001
+#define NV50_3D_LINE_SMOOTH_BLUR_HIGH 0x00000002
+
+#define NV50_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NV50_3D_COVERAGE_LUT(i0) (0x00001670 + 0x4*(i0))
+#define NV50_3D_COVERAGE_LUT__ESIZE 0x00000004
+#define NV50_3D_COVERAGE_LUT__LEN 0x00000004
+#define NV50_3D_COVERAGE_LUT_0__MASK 0x000000ff
+#define NV50_3D_COVERAGE_LUT_0__SHIFT 0
+#define NV50_3D_COVERAGE_LUT_1__MASK 0x0000ff00
+#define NV50_3D_COVERAGE_LUT_1__SHIFT 8
+#define NV50_3D_COVERAGE_LUT_2__MASK 0x00ff0000
+#define NV50_3D_COVERAGE_LUT_2__SHIFT 16
+#define NV50_3D_COVERAGE_LUT_3__MASK 0xff000000
+#define NV50_3D_COVERAGE_LUT_3__SHIFT 24
+
+#define NV50_3D_LINE_STIPPLE 0x00001680
+#define NV50_3D_LINE_STIPPLE_FACTOR_M1__MASK 0x000000ff
+#define NV50_3D_LINE_STIPPLE_FACTOR_M1__SHIFT 0
+#define NV50_3D_LINE_STIPPLE_PATTERN__MASK 0x00ffff00
+#define NV50_3D_LINE_STIPPLE_PATTERN__SHIFT 8
+
+#define NV50_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NV50_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NV50_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NV50_3D_UNK1690 0x00001690
+#define NV50_3D_UNK1690_ALWAYS_DERIV 0x00000001
+#define NV50_3D_UNK1690_UNK16 0x00010000
+
+#define NV50_3D_SET_PROGRAM_CB 0x00001694
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM__MASK 0x000000f0
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM__SHIFT 4
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX 0x00000000
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY 0x00000020
+#define NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT 0x00000030
+#define NV50_3D_SET_PROGRAM_CB_INDEX__MASK 0x00000f00
+#define NV50_3D_SET_PROGRAM_CB_INDEX__SHIFT 8
+#define NV50_3D_SET_PROGRAM_CB_BUFFER__MASK 0x0007f000
+#define NV50_3D_SET_PROGRAM_CB_BUFFER__SHIFT 12
+#define NV50_3D_SET_PROGRAM_CB_VALID 0x00000001
+
+#define NV50_3D_UNK1698 0x00001698
+#define NV50_3D_UNK1698_0 0x00000001
+#define NV50_3D_UNK1698_1 0x00000010
+#define NV50_3D_UNK1698_2 0x00000100
+
+#define NVA3_3D_SAMPLE_SHADING 0x0000169c
+#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f
+#define NVA3_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0
+#define NVA3_3D_SAMPLE_SHADING_ENABLE 0x00000010
+
+#define NVA3_3D_UNK16A0 0x000016a0
+
+#define NV50_3D_VP_RESULT_MAP_SIZE 0x000016ac
+
+#define NV50_3D_VP_REG_ALLOC_TEMP 0x000016b0
+
+#define NVA0_3D_UNK16B4 0x000016b4
+#define NVA0_3D_UNK16B4_UNK0 0x00000001
+#define NVA3_3D_UNK16B4_UNK1 0x00000002
+
+#define NV50_3D_VP_REG_ALLOC_RESULT 0x000016b8
+
+#define NV50_3D_VP_RESULT_MAP(i0) (0x000016bc + 0x4*(i0))
+#define NV50_3D_VP_RESULT_MAP__ESIZE 0x00000004
+#define NV50_3D_VP_RESULT_MAP__LEN 0x00000011
+#define NV50_3D_VP_RESULT_MAP_0__MASK 0x000000ff
+#define NV50_3D_VP_RESULT_MAP_0__SHIFT 0
+#define NV50_3D_VP_RESULT_MAP_1__MASK 0x0000ff00
+#define NV50_3D_VP_RESULT_MAP_1__SHIFT 8
+#define NV50_3D_VP_RESULT_MAP_2__MASK 0x00ff0000
+#define NV50_3D_VP_RESULT_MAP_2__SHIFT 16
+#define NV50_3D_VP_RESULT_MAP_3__MASK 0xff000000
+#define NV50_3D_VP_RESULT_MAP_3__SHIFT 24
+
+#define NV50_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NV50_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NV50_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVA0_3D_STRMOUT_OFFSET(i0) (0x00001780 + 0x4*(i0))
+#define NVA0_3D_STRMOUT_OFFSET__ESIZE 0x00000004
+#define NVA0_3D_STRMOUT_OFFSET__LEN 0x00000004
+
+#define NV50_3D_GP_ENABLE 0x00001798
+
+#define NV50_3D_GP_REG_ALLOC_TEMP 0x000017a0
+
+#define NV50_3D_GP_REG_ALLOC_RESULT 0x000017a8
+
+#define NV50_3D_GP_RESULT_MAP_SIZE 0x000017ac
+
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE 0x000017b0
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS 0x00000001
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP 0x00000002
+#define NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP 0x00000003
+
+#define NV50_3D_RASTERIZE_ENABLE 0x000017b4
+
+#define NV50_3D_STRMOUT_ENABLE 0x000017b8
+
+#define NV50_3D_GP_RESULT_MAP(i0) (0x000017fc + 0x4*(i0))
+#define NV50_3D_GP_RESULT_MAP__ESIZE 0x00000004
+#define NV50_3D_GP_RESULT_MAP__LEN 0x00000021
+#define NV50_3D_GP_RESULT_MAP_0__MASK 0x000000ff
+#define NV50_3D_GP_RESULT_MAP_0__SHIFT 0
+#define NV50_3D_GP_RESULT_MAP_1__MASK 0x0000ff00
+#define NV50_3D_GP_RESULT_MAP_1__SHIFT 8
+#define NV50_3D_GP_RESULT_MAP_2__MASK 0x00ff0000
+#define NV50_3D_GP_RESULT_MAP_2__SHIFT 16
+#define NV50_3D_GP_RESULT_MAP_3__MASK 0xff000000
+#define NV50_3D_GP_RESULT_MAP_3__SHIFT 24
+
+#define NV50_3D_POLYGON_OFFSET_CLAMP 0x0000187c
+
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT(i0) (0x00001880 + 0x4*(i0))
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__ESIZE 0x00000004
+#define NVA3_3D_VERTEX_ARRAY_PER_INSTANCE_ALT__LEN 0x00000020
+
+#define NV50_3D_GP_VIEWPORT_ID_ENABLE 0x00001900
+
+#define NV50_3D_SEMANTIC_COLOR 0x00001904
+#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT 0
+#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK 0x0000ff00
+#define NV50_3D_SEMANTIC_COLOR_BFC0_ID__SHIFT 8
+#define NV50_3D_SEMANTIC_COLOR_COLR_NR__MASK 0x00ff0000
+#define NV50_3D_SEMANTIC_COLOR_COLR_NR__SHIFT 16
+#define NV50_3D_SEMANTIC_COLOR_CLMP_EN 0x01000000
+
+#define NV50_3D_SEMANTIC_CLIP 0x00001908
+#define NV50_3D_SEMANTIC_CLIP_CLIP_START__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_CLIP_CLIP_START__SHIFT 0
+#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__MASK 0x00000f00
+#define NV50_3D_SEMANTIC_CLIP_CLIP_NUM__SHIFT 8
+
+#define NV50_3D_SEMANTIC_LAYER 0x0000190c
+#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_LAYER_LAYER_ID__SHIFT 0
+
+#define NV50_3D_SEMANTIC_PTSZ 0x00001910
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK 0x00000001
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__SHIFT 0
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__MASK 0x00000ff0
+#define NV50_3D_SEMANTIC_PTSZ_PTSZ_ID__SHIFT 4
+
+#define NV50_3D_SEMANTIC_PRIM_ID 0x00001914
+#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__MASK 0x000000ff
+#define NV50_3D_SEMANTIC_PRIM_ID_PRIM_ID__SHIFT 0
+
+#define NV50_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NV50_3D_FRONT_FACE 0x0000191c
+#define NV50_3D_FRONT_FACE_CW 0x00000900
+#define NV50_3D_FRONT_FACE_CCW 0x00000901
+
+#define NV50_3D_CULL_FACE 0x00001920
+#define NV50_3D_CULL_FACE_FRONT 0x00000404
+#define NV50_3D_CULL_FACE_BACK 0x00000405
+#define NV50_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NV50_3D_LINE_LAST_PIXEL 0x00001924
+
+#define NVA3_3D_FP_MULTISAMPLE 0x00001928
+#define NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK 0x00000001
+#define NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE 0x00000002
+
+#define NV50_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK0 0x00000001
+#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1 0x00000002
+#define NVA0_3D_VIEW_VOLUME_CLIP_CTRL_UNK2 0x00000004
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NV84_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
+
+#define NV50_3D_CLIP_DISTANCE_MODE 0x00001940
+#define NV50_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001
+#define NV50_3D_CLIP_DISTANCE_MODE_0__SHIFT 0
+#define NV50_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001
+#define NV50_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010
+#define NV50_3D_CLIP_DISTANCE_MODE_1__SHIFT 4
+#define NV50_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010
+#define NV50_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100
+#define NV50_3D_CLIP_DISTANCE_MODE_2__SHIFT 8
+#define NV50_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100
+#define NV50_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000
+#define NV50_3D_CLIP_DISTANCE_MODE_3__SHIFT 12
+#define NV50_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000
+#define NV50_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000
+#define NV50_3D_CLIP_DISTANCE_MODE_4__SHIFT 16
+#define NV50_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000
+#define NV50_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000
+#define NV50_3D_CLIP_DISTANCE_MODE_5__SHIFT 20
+#define NV50_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000
+#define NV50_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000
+#define NV50_3D_CLIP_DISTANCE_MODE_6__SHIFT 24
+#define NV50_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000
+#define NV50_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000
+#define NV50_3D_CLIP_DISTANCE_MODE_7__SHIFT 28
+#define NV50_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000
+#define NV50_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000
+
+#define NVA3_3D_UNK1944 0x00001944
+
+#define NV50_3D_CLIP_RECTS_EN 0x0000194c
+
+#define NV50_3D_CLIP_RECTS_MODE 0x00001950
+#define NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NV50_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NV50_3D_CLIP_RECTS_MODE_NEVER 0x00000002
+
+#define NV50_3D_ZCULL_VALIDATE 0x00001954
+#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK0 0x00000001
+#define NV50_3D_ZCULL_VALIDATE_CLEAR_UNK1 0x00000010
+
+#define NV50_3D_ZCULL_INVALIDATE 0x00001958
+
+#define NVA3_3D_UNK1960 0x00001960
+#define NVA3_3D_UNK1960_0 0x00000001
+#define NVA3_3D_UNK1960_1 0x00000010
+
+#define NV50_3D_UNK1968 0x00001968
+#define NV50_3D_UNK1968_0 0x00000001
+#define NV50_3D_UNK1968_1 0x00000010
+
+#define NV50_3D_FP_CTRL_UNK196C 0x0000196c
+#define NV50_3D_FP_CTRL_UNK196C_0 0x00000001
+#define NV50_3D_FP_CTRL_UNK196C_1 0x00000010
+
+#define NV50_3D_UNK1978 0x00001978
+
+#define NV50_3D_CLIPID_ENABLE 0x0000197c
+
+#define NV50_3D_CLIPID_WIDTH 0x00001980
+#define NV50_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NV50_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NV50_3D_CLIPID_ID 0x00001984
+
+#define NV50_3D_FP_INTERPOLANT_CTRL 0x00001988
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__MASK 0xff000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK__SHIFT 24
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_X 0x01000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Y 0x02000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_Z 0x04000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_UMASK_W 0x08000000
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__MASK 0x00ff0000
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT 16
+#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__MASK 0x0000ff00
+#define NV50_3D_FP_INTERPOLANT_CTRL_OFFSET__SHIFT 8
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__MASK 0x000000ff
+#define NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT 0
+
+#define NV50_3D_FP_REG_ALLOC_TEMP 0x0000198c
+
+#define NV50_3D_REG_MODE 0x000019a0
+#define NV50_3D_REG_MODE_PACKED 0x00000001
+#define NV50_3D_REG_MODE_STRIPED 0x00000002
+
+#define NV50_3D_FP_CONTROL 0x000019a8
+#define NV50_3D_FP_CONTROL_MULTIPLE_RESULTS 0x00000001
+#define NV50_3D_FP_CONTROL_EXPORTS_Z 0x00000100
+#define NV50_3D_FP_CONTROL_USES_KIL 0x00100000
+
+#define NV50_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NV50_3D_UNK19C0 0x000019c0
+
+#define NV50_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NV50_3D_LOGIC_OP 0x000019c8
+#define NV50_3D_LOGIC_OP_CLEAR 0x00001500
+#define NV50_3D_LOGIC_OP_AND 0x00001501
+#define NV50_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NV50_3D_LOGIC_OP_COPY 0x00001503
+#define NV50_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NV50_3D_LOGIC_OP_NOOP 0x00001505
+#define NV50_3D_LOGIC_OP_XOR 0x00001506
+#define NV50_3D_LOGIC_OP_OR 0x00001507
+#define NV50_3D_LOGIC_OP_NOR 0x00001508
+#define NV50_3D_LOGIC_OP_EQUIV 0x00001509
+#define NV50_3D_LOGIC_OP_INVERT 0x0000150a
+#define NV50_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NV50_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NV50_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NV50_3D_LOGIC_OP_NAND 0x0000150e
+#define NV50_3D_LOGIC_OP_SET 0x0000150f
+
+#define NV50_3D_ZETA_COMP_ENABLE 0x000019cc
+
+#define NV50_3D_CLEAR_BUFFERS 0x000019d0
+#define NV50_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NV50_3D_CLEAR_BUFFERS_S 0x00000002
+#define NV50_3D_CLEAR_BUFFERS_R 0x00000004
+#define NV50_3D_CLEAR_BUFFERS_G 0x00000008
+#define NV50_3D_CLEAR_BUFFERS_B 0x00000010
+#define NV50_3D_CLEAR_BUFFERS_A 0x00000020
+#define NV50_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NV50_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NV50_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NV50_3D_CLIPID_FILL 0x000019d4
+
+#define NV50_3D_UNK19D8(i0) (0x000019d8 + 0x4*(i0))
+#define NV50_3D_UNK19D8__ESIZE 0x00000004
+#define NV50_3D_UNK19D8__LEN 0x00000002
+
+#define NV50_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0))
+#define NV50_3D_RT_COMP_ENABLE__ESIZE 0x00000004
+#define NV50_3D_RT_COMP_ENABLE__LEN 0x00000008
+
+#define NV50_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NV50_3D_COLOR_MASK__ESIZE 0x00000004
+#define NV50_3D_COLOR_MASK__LEN 0x00000008
+#define NV50_3D_COLOR_MASK_R 0x0000000f
+#define NV50_3D_COLOR_MASK_G 0x000000f0
+#define NV50_3D_COLOR_MASK_B 0x00000f00
+#define NV50_3D_COLOR_MASK_A 0x0000f000
+
+#define NV50_3D_UNK1A20 0x00001a20
+
+#define NV50_3D_DELAY 0x00001a24
+
+#define NV50_3D_UNK1A28 0x00001a28
+#define NV50_3D_UNK1A28_0__MASK 0x000000ff
+#define NV50_3D_UNK1A28_0__SHIFT 0
+#define NV50_3D_UNK1A28_1 0x00000100
+
+#define NV50_3D_UNK1A2C 0x00001a2c
+
+#define NV50_3D_UNK1A30 0x00001a30
+
+#define NV50_3D_UNK1A34 0x00001a34
+
+#define NV50_3D_UNK1A38 0x00001a38
+
+#define NV50_3D_UNK1A3C 0x00001a3c
+
+#define NV50_3D_UNK1A40(i0) (0x00001a40 + 0x4*(i0))
+#define NV50_3D_UNK1A40__ESIZE 0x00000004
+#define NV50_3D_UNK1A40__LEN 0x00000010
+#define NV50_3D_UNK1A40_0__MASK 0x00000007
+#define NV50_3D_UNK1A40_0__SHIFT 0
+#define NV50_3D_UNK1A40_1__MASK 0x00000070
+#define NV50_3D_UNK1A40_1__SHIFT 4
+#define NV50_3D_UNK1A40_2__MASK 0x00000700
+#define NV50_3D_UNK1A40_2__SHIFT 8
+#define NV50_3D_UNK1A40_3__MASK 0x00007000
+#define NV50_3D_UNK1A40_3__SHIFT 12
+#define NV50_3D_UNK1A40_4__MASK 0x00070000
+#define NV50_3D_UNK1A40_4__SHIFT 16
+#define NV50_3D_UNK1A40_5__MASK 0x00700000
+#define NV50_3D_UNK1A40_5__SHIFT 20
+#define NV50_3D_UNK1A40_6__MASK 0x07000000
+#define NV50_3D_UNK1A40_6__SHIFT 24
+#define NV50_3D_UNK1A40_7__MASK 0x70000000
+#define NV50_3D_UNK1A40_7__SHIFT 28
+
+#define NV50_3D_STRMOUT_ADDRESS_HIGH(i0) (0x00001a80 + 0x10*(i0))
+#define NV50_3D_STRMOUT_ADDRESS_HIGH__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_ADDRESS_HIGH__LEN 0x00000004
+
+#define NV50_3D_STRMOUT_ADDRESS_LOW(i0) (0x00001a84 + 0x10*(i0))
+#define NV50_3D_STRMOUT_ADDRESS_LOW__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_ADDRESS_LOW__LEN 0x00000004
+
+#define NV50_3D_STRMOUT_NUM_ATTRIBS(i0) (0x00001a88 + 0x10*(i0))
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__ESIZE 0x00000010
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__LEN 0x00000004
+#define NV50_3D_STRMOUT_NUM_ATTRIBS__MAX 0x00000040
+
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT(i0) (0x00001a8c + 0x10*(i0))
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT__ESIZE 0x00000010
+#define NVA0_3D_STRMOUT_OFFSET_LIMIT__LEN 0x00000004
+
+#define NV50_3D_VERTEX_ARRAY_ATTRIB(i0) (0x00001ac0 + 0x4*(i0))
+#define NV50_3D_VERTEX_ARRAY_ATTRIB__ESIZE 0x00000004
+#define NV50_3D_VERTEX_ARRAY_ATTRIB__LEN 0x00000010
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__MASK 0x0000000f
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BUFFER__SHIFT 0
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_CONST 0x00000010
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__MASK 0x0007ffe0
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_OFFSET__SHIFT 5
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__MASK 0x01f80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT__SHIFT 19
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 0x00080000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32 0x00100000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16 0x00180000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32 0x00200000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16 0x00280000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8 0x00500000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16_16 0x00780000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32 0x00900000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8 0x00980000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_10_10_10_2 0x01800000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__MASK 0x7e000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE__SHIFT 25
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000
+#define NV50_3D_VERTEX_ARRAY_ATTRIB_BGRA 0x80000000
+
+#define NV50_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NV50_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NV50_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NV50_3D_QUERY_GET 0x00001b0c
+#define NV50_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NV50_3D_QUERY_GET_MODE__SHIFT 0
+#define NV50_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NV50_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NV50_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NV50_3D_QUERY_GET_UNK4 0x00000010
+#define NVA0_3D_QUERY_GET_INDEX__MASK 0x000000e0
+#define NVA0_3D_QUERY_GET_INDEX__SHIFT 5
+#define NV50_3D_QUERY_GET_UNK8 0x00000100
+#define NV50_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NV50_3D_QUERY_GET_UNIT__SHIFT 12
+#define NV50_3D_QUERY_GET_UNIT_UNK00 0x00000000
+#define NV50_3D_QUERY_GET_UNIT_VFETCH 0x00001000
+#define NV50_3D_QUERY_GET_UNIT_VP 0x00002000
+#define NV50_3D_QUERY_GET_UNIT_RAST 0x00004000
+#define NV50_3D_QUERY_GET_UNIT_STRMOUT 0x00005000
+#define NV50_3D_QUERY_GET_UNIT_GP 0x00006000
+#define NV50_3D_QUERY_GET_UNIT_ZCULL 0x00007000
+#define NV50_3D_QUERY_GET_UNIT_TPROP 0x0000a000
+#define NV50_3D_QUERY_GET_UNIT_UNK0C 0x0000c000
+#define NV50_3D_QUERY_GET_UNIT_CROP 0x0000f000
+#define NV50_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NV50_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NV50_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NV50_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NV50_3D_QUERY_GET_INTR 0x00100000
+#define NV50_3D_QUERY_GET_TYPE__MASK 0x00800000
+#define NV50_3D_QUERY_GET_TYPE__SHIFT 23
+#define NV50_3D_QUERY_GET_TYPE_QUERY 0x00000000
+#define NV50_3D_QUERY_GET_TYPE_COUNTER 0x00800000
+#define NV50_3D_QUERY_GET_QUERY_SELECT__MASK 0x0f000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT__SHIFT 24
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZERO 0x00000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_SAMPLECNT 0x01000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_STRMOUT_NO_OVERFLOW 0x02000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_DROPPED_PRIMITIVES 0x03000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_VERTICES 0x04000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK0 0x05000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK1 0x06000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK2 0x07000000
+#define NV50_3D_QUERY_GET_QUERY_SELECT_ZCULL_STAT_UNK3 0x08000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_RT_UNK14 0x0c000000
+#define NVA0_3D_QUERY_GET_QUERY_SELECT_STRMOUT_OFFSET 0x0d000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT__MASK 0x0f000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT__SHIFT 24
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_VERTICES 0x00000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VFETCH_PRIMITIVES 0x01000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_VP_LAUNCHES 0x02000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_LAUNCHES 0x03000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GP_PRIMITIVES_OUT 0x04000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_TRANSFORM_FEEDBACK 0x05000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_GENERATED_PRIMITIVES 0x06000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_PRECLIP 0x07000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_RAST_PRIMITIVES_POSTCLIP 0x08000000
+#define NV50_3D_QUERY_GET_COUNTER_SELECT_FP_PIXELS 0x09000000
+#define NV84_3D_QUERY_GET_COUNTER_SELECT_UNK0A 0x0a000000
+#define NVA0_3D_QUERY_GET_COUNTER_SELECT_UNK0C 0x0c000000
+#define NV50_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVA3_3D_VP_RESULT_MAP_ALT(i0) (0x00001b3c + 0x4*(i0))
+#define NVA3_3D_VP_RESULT_MAP_ALT__ESIZE 0x00000004
+#define NVA3_3D_VP_RESULT_MAP_ALT__LEN 0x00000020
+#define NVA3_3D_VP_RESULT_MAP_ALT_0__MASK 0x000000ff
+#define NVA3_3D_VP_RESULT_MAP_ALT_0__SHIFT 0
+#define NVA3_3D_VP_RESULT_MAP_ALT_1__MASK 0x0000ff00
+#define NVA3_3D_VP_RESULT_MAP_ALT_1__SHIFT 8
+#define NVA3_3D_VP_RESULT_MAP_ALT_2__MASK 0x00ff0000
+#define NVA3_3D_VP_RESULT_MAP_ALT_2__SHIFT 16
+#define NVA3_3D_VP_RESULT_MAP_ALT_3__MASK 0xff000000
+#define NVA3_3D_VP_RESULT_MAP_ALT_3__SHIFT 24
+
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT(i0) (0x00001c00 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT__LEN 0x00000020
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__MASK 0x00000fff
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_STRIDE__SHIFT 0
+#define NVA3_3D_VERTEX_ARRAY_FETCH_ALT_ENABLE 0x20000000
+
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT(i0) (0x00001c04 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_START_HIGH_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT(i0) (0x00001c08 + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_START_LOW_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT(i0) (0x00001c0c + 0x10*(i0))
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__ESIZE 0x00000010
+#define NVA3_3D_VERTEX_ARRAY_DIVISOR_ALT__LEN 0x00000020
+
+#define NVA3_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVA3_3D_IBLEND__ESIZE 0x00000020
+#define NVA3_3D_IBLEND__LEN 0x00000008
+
+#define NVA3_3D_IBLEND_SEPARATE_ALPHA(i0) (0x00001e00 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVA3_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVA3_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVA3_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVA3_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVA3_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVA3_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVA3_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT(i0) (0x00001f00 + 0x8*(i0))
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__ESIZE 0x00000008
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_HIGH_ALT__LEN 0x00000020
+
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT(i0) (0x00001f04 + 0x8*(i0))
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__ESIZE 0x00000008
+#define NVA3_3D_VERTEX_ARRAY_LIMIT_LOW_ALT__LEN 0x00000020
+
+
+#endif /* RNNDB_NV50_3D_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h
new file mode 100644
index 00000000000..f26ac45da40
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nv50_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
new file mode 100644
index 00000000000..bdd6a63d1f1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
@@ -0,0 +1,223 @@
+
+#ifndef __NV50_BLIT_H__
+#define __NV50_BLIT_H__
+
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+void *
+nv50_blitter_make_fp(struct pipe_context *,
+ unsigned mode,
+ enum pipe_texture_target);
+
+unsigned
+nv50_blit_select_mode(const struct pipe_blit_info *);
+
+/* Converted to a pipe->blit. */
+void
+nv50_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
+
+#define NV50_BLIT_MODE_PASS 0 /* pass through TEX $t0/$s0 output */
+#define NV50_BLIT_MODE_Z24S8 1 /* encode ZS values for RGBA unorm8 */
+#define NV50_BLIT_MODE_S8Z24 2
+#define NV50_BLIT_MODE_X24S8 3
+#define NV50_BLIT_MODE_S8X24 4
+#define NV50_BLIT_MODE_Z24X8 5
+#define NV50_BLIT_MODE_X8Z24 6
+#define NV50_BLIT_MODE_ZS 7 /* put $t0/$s0 into R, $t1/$s1 into G */
+#define NV50_BLIT_MODE_XS 8 /* put $t1/$s1 into G */
+#define NV50_BLIT_MODES 9
+
+/* CUBE and RECT textures are reinterpreted as 2D(_ARRAY) */
+#define NV50_BLIT_TEXTURE_BUFFER 0
+#define NV50_BLIT_TEXTURE_1D 1
+#define NV50_BLIT_TEXTURE_2D 2
+#define NV50_BLIT_TEXTURE_3D 3
+#define NV50_BLIT_TEXTURE_1D_ARRAY 4
+#define NV50_BLIT_TEXTURE_2D_ARRAY 5
+#define NV50_BLIT_MAX_TEXTURE_TYPES 6
+
+static INLINE unsigned
+nv50_blit_texture_type(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D: return NV50_BLIT_TEXTURE_1D;
+ case PIPE_TEXTURE_2D: return NV50_BLIT_TEXTURE_2D;
+ case PIPE_TEXTURE_3D: return NV50_BLIT_TEXTURE_3D;
+ case PIPE_TEXTURE_1D_ARRAY: return NV50_BLIT_TEXTURE_1D_ARRAY;
+ case PIPE_TEXTURE_2D_ARRAY: return NV50_BLIT_TEXTURE_2D_ARRAY;
+ default:
+ assert(target == PIPE_BUFFER);
+ return NV50_BLIT_TEXTURE_BUFFER;
+ }
+}
+
+static INLINE unsigned
+nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D: return TGSI_TEXTURE_1D;
+ case PIPE_TEXTURE_2D: return TGSI_TEXTURE_2D;
+ case PIPE_TEXTURE_3D: return TGSI_TEXTURE_3D;
+ case PIPE_TEXTURE_1D_ARRAY: return TGSI_TEXTURE_1D_ARRAY;
+ case PIPE_TEXTURE_2D_ARRAY: return TGSI_TEXTURE_2D_ARRAY;
+ default:
+ assert(target == PIPE_BUFFER);
+ return TGSI_TEXTURE_BUFFER;
+ }
+}
+
+static INLINE enum pipe_texture_target
+nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return PIPE_TEXTURE_2D_ARRAY;
+ case PIPE_TEXTURE_RECT:
+ return PIPE_TEXTURE_2D;
+ default:
+ return target;
+ }
+}
+
+static INLINE unsigned
+nv50_blit_get_filter(const struct pipe_blit_info *info)
+{
+ if (info->dst.resource->nr_samples < info->src.resource->nr_samples)
+ return util_format_is_depth_or_stencil(info->src.format) ? 0 : 1;
+
+ if (info->filter != PIPE_TEX_FILTER_LINEAR)
+ return 0;
+
+ if ((info->dst.box.width == info->src.box.width ||
+ info->dst.box.width == -info->src.box.width) &&
+ (info->dst.box.height == info->src.box.height ||
+ info->dst.box.height == -info->src.box.height))
+ return 0;
+
+ return 1;
+}
+
+/* Since shaders cannot export stencil, we cannot copy stencil values when
+ * rendering to ZETA, so we attach the ZS surface to a colour render target.
+ */
+static INLINE enum pipe_format
+nv50_blit_zeta_to_colour_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return PIPE_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return PIPE_FORMAT_R32_FLOAT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+ default:
+ assert(0);
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+
+static INLINE uint16_t
+nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
+{
+ const unsigned mask = info->mask;
+
+ uint16_t color_mask = 0;
+
+ switch (info->dst.format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (mask & PIPE_MASK_S)
+ color_mask |= 0x1000;
+ /* fall through */
+ case PIPE_FORMAT_Z24X8_UNORM:
+ if (mask & PIPE_MASK_Z)
+ color_mask |= 0x0111;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (mask & PIPE_MASK_Z)
+ color_mask |= 0x1110;
+ if (mask & PIPE_MASK_S)
+ color_mask |= 0x0001;
+ break;
+ default:
+ if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) color_mask |= 0x0001;
+ if (mask & (PIPE_MASK_G | PIPE_MASK_S)) color_mask |= 0x0010;
+ if (mask & PIPE_MASK_B) color_mask |= 0x0100;
+ if (mask & PIPE_MASK_A) color_mask |= 0x1000;
+ break;
+ }
+
+ return color_mask;
+}
+
+static INLINE uint32_t
+nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
+{
+ uint32_t mask = 0;
+
+ switch (info->dst.format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (info->mask & PIPE_MASK_Z) mask |= 0x00ffffff;
+ if (info->mask & PIPE_MASK_S) mask |= 0xff000000;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (info->mask & PIPE_MASK_Z) mask |= 0xffffff00;
+ if (info->mask & PIPE_MASK_S) mask |= 0x000000ff;
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ if (info->mask & PIPE_MASK_Z) mask = 0x00ffffff;
+ break;
+ default:
+ mask = 0xffffffff;
+ break;
+ }
+ return mask;
+}
+
+#if NOUVEAU_DRIVER == 0xc0
+# define nv50_format_table nvc0_format_table
+#endif
+
+/* return TRUE for formats that can be converted among each other by NVC0_2D */
+static INLINE boolean
+nv50_2d_dst_format_faithful(enum pipe_format format)
+{
+ const uint64_t mask =
+ NV50_ENG2D_SUPPORTED_FORMATS &
+ ~NV50_ENG2D_NOCONVERT_FORMATS;
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
+}
+static INLINE boolean
+nv50_2d_src_format_faithful(enum pipe_format format)
+{
+ const uint64_t mask =
+ NV50_ENG2D_SUPPORTED_FORMATS &
+ ~(NV50_ENG2D_LUMINANCE_FORMATS | NV50_ENG2D_INTENSITY_FORMATS);
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
+}
+
+static INLINE boolean
+nv50_2d_format_supported(enum pipe_format format)
+{
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) &&
+ (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
+}
+
+static INLINE boolean
+nv50_2d_dst_format_ops_supported(enum pipe_format format)
+{
+ uint8_t id = nv50_format_table[format].rt;
+ return (id >= 0xc0) &&
+ (NV50_ENG2D_OPERATION_FORMATS & (1ULL << (id - 0xc0)));
+}
+
+#endif /* __NV50_BLIT_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
new file mode 100644
index 00000000000..b6bdf79b389
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
+
+#ifdef NV50_WITH_DRAW_MODULE
+#include "draw/draw_context.h"
+#endif
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_screen.h"
+#include "nv50/nv50_resource.h"
+
+static void
+nv50_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct nouveau_screen *screen = nouveau_screen(pipe->screen);
+
+ if (fence)
+ nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
+
+ PUSH_KICK(screen->pushbuf);
+
+ nouveau_context_update_frame_stats(nouveau_context(pipe));
+}
+
+static void
+nv50_texture_barrier(struct pipe_context *pipe)
+{
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, 0x20);
+}
+
+void
+nv50_default_kick_notify(struct nouveau_pushbuf *push)
+{
+ struct nv50_screen *screen = push->user_priv;
+
+ if (screen) {
+ nouveau_fence_next(&screen->base);
+ nouveau_fence_update(&screen->base, TRUE);
+ if (screen->cur_ctx)
+ screen->cur_ctx->state.flushed = TRUE;
+ }
+}
+
+static void
+nv50_context_unreference_resources(struct nv50_context *nv50)
+{
+ unsigned s, i;
+
+ nouveau_bufctx_del(&nv50->bufctx_3d);
+ nouveau_bufctx_del(&nv50->bufctx);
+
+ util_unreference_framebuffer_state(&nv50->framebuffer);
+
+ for (i = 0; i < nv50->num_vtxbufs; ++i)
+ pipe_resource_reference(&nv50->vtxbuf[i].buffer, NULL);
+
+ pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
+
+ for (s = 0; s < 3; ++s) {
+ for (i = 0; i < nv50->num_textures[s]; ++i)
+ pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
+
+ for (i = 0; i < NV50_MAX_PIPE_CONSTBUFS; ++i)
+ if (!nv50->constbuf[s][i].user)
+ pipe_resource_reference(&nv50->constbuf[s][i].u.buf, NULL);
+ }
+}
+
+static void
+nv50_destroy(struct pipe_context *pipe)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (nv50_context_screen(nv50)->cur_ctx == nv50) {
+ nv50->base.pushbuf->kick_notify = NULL;
+ nv50_context_screen(nv50)->cur_ctx = NULL;
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
+ }
+ /* need to flush before destroying the bufctx */
+ nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);
+
+ nv50_context_unreference_resources(nv50);
+
+#ifdef NV50_WITH_DRAW_MODULE
+ draw_destroy(nv50->draw);
+#endif
+
+ nouveau_context_destroy(&nv50->base);
+}
+
+static int
+nv50_invalidate_resource_storage(struct nouveau_context *ctx,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nv50_context *nv50 = nv50_context(&ctx->pipe);
+ unsigned s, i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
+ if (nv50->framebuffer.cbufs[i] &&
+ nv50->framebuffer.cbufs[i]->texture == res) {
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nv50->framebuffer.zsbuf &&
+ nv50->framebuffer.zsbuf->texture == res) {
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ if (nv50->vtxbuf[i].buffer == res) {
+ nv50->dirty |= NV50_NEW_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nv50->idxbuf.buffer == res)
+ if (!--ref)
+ return ref;
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nv50->num_textures[s]; ++i) {
+ if (nv50->textures[s][i] &&
+ nv50->textures[s][i]->texture == res) {
+ nv50->dirty |= NV50_NEW_TEXTURES;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ if (!nv50->constbuf[s][i].user &&
+ nv50->constbuf[s][i].u.buf == res) {
+ nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->constbuf_dirty[s] |= 1 << i;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ return ref;
+}
+
+struct pipe_context *
+nv50_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nv50_context *nv50;
+ struct pipe_context *pipe;
+ int ret;
+ uint32_t flags;
+
+ nv50 = CALLOC_STRUCT(nv50_context);
+ if (!nv50)
+ return NULL;
+ pipe = &nv50->base.pipe;
+
+ if (!nv50_blitctx_create(nv50))
+ goto out_err;
+
+ nv50->base.pushbuf = screen->base.pushbuf;
+ nv50->base.client = screen->base.client;
+
+ ret = nouveau_bufctx_new(screen->base.client, NV50_BIND_COUNT,
+ &nv50->bufctx_3d);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nv50->bufctx);
+ if (ret)
+ goto out_err;
+
+ nv50->base.screen = &screen->base;
+ nv50->base.copy_data = nv50_m2mf_copy_linear;
+ nv50->base.push_data = nv50_sifc_linear_u8;
+ nv50->base.push_cb = nv50_cb_push;
+
+ nv50->screen = screen;
+ pipe->screen = pscreen;
+ pipe->priv = priv;
+
+ pipe->destroy = nv50_destroy;
+
+ pipe->draw_vbo = nv50_draw_vbo;
+ pipe->clear = nv50_clear;
+
+ pipe->flush = nv50_flush;
+ pipe->texture_barrier = nv50_texture_barrier;
+
+ if (!screen->cur_ctx) {
+ screen->cur_ctx = nv50;
+ nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
+ }
+ nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
+
+ nv50_init_query_functions(nv50);
+ nv50_init_surface_functions(nv50);
+ nv50_init_state_functions(nv50);
+ nv50_init_resource_functions(pipe);
+
+ nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
+
+#ifdef NV50_WITH_DRAW_MODULE
+ /* no software fallbacks implemented */
+ nv50->draw = draw_create(pipe);
+ assert(nv50->draw);
+ draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
+#endif
+
+ if (screen->base.device->chipset < 0x84 ||
+ debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ /* PMPEG */
+ nouveau_context_init_vdec(&nv50->base);
+ } else if (screen->base.device->chipset < 0x98 ||
+ screen->base.device->chipset == 0xa0) {
+ /* VP2 */
+ pipe->create_video_codec = nv84_create_decoder;
+ pipe->create_video_buffer = nv84_video_buffer_create;
+ } else {
+ /* VP3/4 */
+ pipe->create_video_codec = nv98_create_decoder;
+ pipe->create_video_buffer = nv98_video_buffer_create;
+ }
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+
+ flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
+
+ BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
+
+ nv50->base.scratch.bo_size = 2 << 20;
+
+ return pipe;
+
+out_err:
+ if (nv50) {
+ if (nv50->bufctx_3d)
+ nouveau_bufctx_del(&nv50->bufctx_3d);
+ if (nv50->bufctx)
+ nouveau_bufctx_del(&nv50->bufctx);
+ if (nv50->blit)
+ FREE(nv50->blit);
+ FREE(nv50);
+ }
+ return NULL;
+}
+
+void
+nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush)
+{
+ struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
+ struct nouveau_list *it;
+
+ for (it = list->next; it != list; it = it->next) {
+ struct nouveau_bufref *ref = (struct nouveau_bufref *)it;
+ struct nv04_resource *res = ref->priv;
+ if (res)
+ nv50_resource_validate(res, (unsigned)ref->priv_data);
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
new file mode 100644
index 00000000000..ee6eb0ef715
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -0,0 +1,322 @@
+#ifndef __NV50_CONTEXT_H__
+#define __NV50_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#ifdef NV50_WITH_DRAW_MODULE
+#include "draw/draw_vertex.h"
+#endif
+
+#include "nv50/nv50_debug.h"
+#include "nv50/nv50_winsys.h"
+#include "nv50/nv50_stateobj.h"
+#include "nv50/nv50_screen.h"
+#include "nv50/nv50_program.h"
+#include "nv50/nv50_resource.h"
+#include "nv50/nv50_transfer.h"
+
+#include "nouveau_context.h"
+#include "nv_object.xml.h"
+#include "nv_m2mf.xml.h"
+#include "nv50/nv50_3ddefs.xml.h"
+#include "nv50/nv50_3d.xml.h"
+#include "nv50/nv50_2d.xml.h"
+
+#define NV50_NEW_BLEND (1 << 0)
+#define NV50_NEW_RASTERIZER (1 << 1)
+#define NV50_NEW_ZSA (1 << 2)
+#define NV50_NEW_VERTPROG (1 << 3)
+#define NV50_NEW_GMTYPROG (1 << 6)
+#define NV50_NEW_FRAGPROG (1 << 7)
+#define NV50_NEW_BLEND_COLOUR (1 << 8)
+#define NV50_NEW_STENCIL_REF (1 << 9)
+#define NV50_NEW_CLIP (1 << 10)
+#define NV50_NEW_SAMPLE_MASK (1 << 11)
+#define NV50_NEW_FRAMEBUFFER (1 << 12)
+#define NV50_NEW_STIPPLE (1 << 13)
+#define NV50_NEW_SCISSOR (1 << 14)
+#define NV50_NEW_VIEWPORT (1 << 15)
+#define NV50_NEW_ARRAYS (1 << 16)
+#define NV50_NEW_VERTEX (1 << 17)
+#define NV50_NEW_CONSTBUF (1 << 18)
+#define NV50_NEW_TEXTURES (1 << 19)
+#define NV50_NEW_SAMPLERS (1 << 20)
+#define NV50_NEW_STRMOUT (1 << 21)
+#define NV50_NEW_CONTEXT (1 << 31)
+
+#define NV50_BIND_FB 0
+#define NV50_BIND_VERTEX 1
+#define NV50_BIND_VERTEX_TMP 2
+#define NV50_BIND_INDEX 3
+#define NV50_BIND_TEXTURES 4
+#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
+#define NV50_BIND_SO 53
+#define NV50_BIND_SCREEN 54
+#define NV50_BIND_TLS 55
+#define NV50_BIND_COUNT 56
+#define NV50_BIND_2D 0
+#define NV50_BIND_M2MF 0
+#define NV50_BIND_FENCE 1
+
+#define NV50_CB_TMP 123
+/* fixed constant buffer binding points - low indices for user's constbufs */
+#define NV50_CB_PVP 124
+#define NV50_CB_PGP 126
+#define NV50_CB_PFP 125
+#define NV50_CB_AUX 127
+
+
+struct nv50_blitctx;
+
+boolean nv50_blitctx_create(struct nv50_context *);
+
+struct nv50_context {
+ struct nouveau_context base;
+
+ struct nv50_screen *screen;
+
+ struct nouveau_bufctx *bufctx_3d;
+ struct nouveau_bufctx *bufctx;
+
+ uint32_t dirty;
+
+ struct {
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t interpolant_ctrl;
+ uint32_t semantic_color;
+ uint32_t semantic_psize;
+ int32_t index_bias;
+ boolean uniform_buffer_bound[3];
+ boolean prim_restart;
+ boolean point_sprite;
+ boolean rt_serialize;
+ boolean flushed;
+ boolean rasterizer_discard;
+ uint8_t tls_required;
+ boolean new_tls_space;
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[3];
+ uint8_t num_samplers[3];
+ uint8_t prim_size;
+ uint16_t scissor;
+ } state;
+
+ struct nv50_blend_stateobj *blend;
+ struct nv50_rasterizer_stateobj *rast;
+ struct nv50_zsa_stateobj *zsa;
+ struct nv50_vertex_stateobj *vertex;
+
+ struct nv50_program *vertprog;
+ struct nv50_program *gmtyprog;
+ struct nv50_program *fragprog;
+
+ struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
+ uint16_t constbuf_dirty[3];
+ uint16_t constbuf_valid[3];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ uint32_t vbo_constant; /* bitmask of user buffers with stride 0 */
+ uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */
+ uint32_t vb_elt_limit; /* max - min element (count - 1) */
+ uint32_t instance_off; /* base vertex for instanced arrays */
+ uint32_t instance_max; /* max instance for current draw call */
+
+ struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[3];
+ struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[3];
+
+ uint8_t num_so_targets;
+ uint8_t so_targets_dirty;
+ struct pipe_stream_output_target *so_target[4];
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_push_hint;
+
+ struct pipe_query *cond_query;
+ boolean cond_cond;
+ uint cond_mode;
+
+ struct nv50_blitctx *blit;
+
+#ifdef NV50_WITH_DRAW_MODULE
+ struct draw_context *draw;
+#endif
+};
+
+static INLINE struct nv50_context *
+nv50_context(struct pipe_context *pipe)
+{
+ return (struct nv50_context *)pipe;
+}
+
+static INLINE struct nv50_screen *
+nv50_context_screen(struct nv50_context *nv50)
+{
+ return nv50_screen(&nv50->base.screen->base);
+}
+
+/* return index used in nv50_context arrays for a specific shader type */
+static INLINE unsigned
+nv50_context_shader_stage(unsigned pipe)
+{
+ switch (pipe) {
+ case PIPE_SHADER_VERTEX: return 0;
+ case PIPE_SHADER_FRAGMENT: return 1;
+ case PIPE_SHADER_GEOMETRY: return 2;
+ case PIPE_SHADER_COMPUTE: return 3;
+ default:
+ assert(!"invalid/unhandled shader type");
+ return 0;
+ }
+}
+
+/* nv50_context.c */
+struct pipe_context *nv50_create(struct pipe_screen *, void *);
+
+void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush);
+
+void nv50_default_kick_notify(struct nouveau_pushbuf *);
+
+/* nv50_draw.c */
+extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
+
+/* nv50_query.c */
+void nv50_init_query_functions(struct nv50_context *);
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+ struct pipe_query *, unsigned result_offset);
+void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
+void nva0_so_target_save_offset(struct pipe_context *,
+ struct pipe_stream_output_target *,
+ unsigned index, boolean seralize);
+
+#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+
+/* nv50_shader_state.c */
+void nv50_vertprog_validate(struct nv50_context *);
+void nv50_gmtyprog_validate(struct nv50_context *);
+void nv50_fragprog_validate(struct nv50_context *);
+void nv50_fp_linkage_validate(struct nv50_context *);
+void nv50_gp_linkage_validate(struct nv50_context *);
+void nv50_constbufs_validate(struct nv50_context *);
+void nv50_validate_derived_rs(struct nv50_context *);
+void nv50_stream_output_validate(struct nv50_context *);
+
+/* nv50_state.c */
+extern void nv50_init_state_functions(struct nv50_context *);
+
+/* nv50_state_validate.c */
+/* @words: check for space before emitting relocs */
+extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask,
+ unsigned space_words);
+
+/* nv50_surface.c */
+extern void nv50_clear(struct pipe_context *, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+extern void nv50_init_surface_functions(struct nv50_context *);
+
+/* nv50_tex.c */
+void nv50_validate_textures(struct nv50_context *);
+void nv50_validate_samplers(struct nv50_context *);
+
+struct pipe_sampler_view *
+nv50_create_texture_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *,
+ uint32_t flags,
+ enum pipe_texture_target);
+struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nv50_transfer.c */
+void
+nv50_m2mf_transfer_rect(struct nv50_context *,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy);
+void
+nv50_sifc_linear_u8(struct nouveau_context *pipe,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
+void
+nv50_m2mf_copy_linear(struct nouveau_context *pipe,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size);
+void
+nv50_cb_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data);
+
+/* nv50_vbo.c */
+void nv50_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nv50_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nv50_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nv50_vertex_arrays_validate(struct nv50_context *nv50);
+
+/* nv50_push.c */
+void nv50_push_vbo(struct nv50_context *, const struct pipe_draw_info *);
+
+/* nv84_video.c */
+struct pipe_video_codec *
+nv84_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *
+nv84_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *template);
+
+int
+nv84_screen_get_video_param(struct pipe_screen *pscreen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param);
+
+boolean
+nv84_screen_video_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint);
+
+/* nv98_video.c */
+struct pipe_video_codec *
+nv98_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *
+nv98_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *template);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_debug.h b/src/gallium/drivers/nouveau/nv50/nv50_debug.h
new file mode 100644
index 00000000000..f3dee621519
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_debug.h
@@ -0,0 +1,25 @@
+
+#ifndef __NV50_DEBUG_H__
+#define __NV50_DEBUG_H__
+
+#include <stdio.h>
+
+#include "util/u_debug.h"
+
+#define NV50_DEBUG_MISC 0x0001
+#define NV50_DEBUG_SHADER 0x0100
+#define NV50_DEBUG_PROG_IR 0x0200
+#define NV50_DEBUG_PROG_RA 0x0400
+#define NV50_DEBUG_PROG_CFLOW 0x0800
+#define NV50_DEBUG_PROG_ALL 0x1f00
+
+#define NV50_DEBUG 0
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args)
+
+#define NV50_DBGMSG(ch, args...) \
+ if ((NV50_DEBUG) & (NV50_DEBUG_##ch)) \
+ debug_printf(args)
+
+#endif /* __NV50_DEBUG_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
new file mode 100644
index 00000000000..2e42843fa56
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h
@@ -0,0 +1,200 @@
+#ifndef NV50_DEFS_XML
+#define NV50_DEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_defs.xml ( 7783 bytes, from 2013-02-14 13:56:25)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nvchipsets.xml ( 3704 bytes, from 2012-08-18 12:48:55)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_VSTATUS_IDLE 0x00000000
+#define NV50_VSTATUS_BUSY 0x00000001
+#define NV50_VSTATUS_UNK2 0x00000002
+#define NV50_VSTATUS_WAITING 0x00000003
+#define NV50_VSTATUS_BLOCKED 0x00000005
+#define NV50_VSTATUS_FAULTED 0x00000006
+#define NV50_VSTATUS_PAUSED 0x00000007
+#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c
+#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d
+#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0
+#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1
+#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2
+#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3
+#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4
+#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5
+#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6
+#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7
+#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8
+#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9
+#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca
+#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb
+#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc
+#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd
+#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce
+#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf
+#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0
+#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1
+#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2
+#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5
+#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6
+#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7
+#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8
+#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9
+#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da
+#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db
+#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc
+#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd
+#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de
+#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df
+#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0
+#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3
+#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4
+#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5
+#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6
+#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7
+#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8
+#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9
+#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea
+#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb
+#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec
+#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed
+#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee
+#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef
+#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0
+#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1
+#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2
+#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3
+#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4
+#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5
+#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6
+#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7
+#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8
+#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9
+#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa
+#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb
+#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc
+#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd
+#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe
+#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff
+#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014
+#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015
+#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016
+#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018
+#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019
+#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d
+#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e
+#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f
+#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002
+#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003
+#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004
+#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008
+#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009
+#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a
+#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b
+#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c
+#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d
+#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e
+#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f
+#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013
+#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015
+#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018
+#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a
+#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b
+#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c
+#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d
+#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e
+#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f
+#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020
+#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021
+#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024
+#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027
+#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028
+#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029
+#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e
+#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f
+#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030
+#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031
+#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032
+#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033
+#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034
+#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035
+#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036
+#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037
+#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038
+#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039
+#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a
+#define NV50_QUERY__SIZE 0x00000010
+#define NV50_QUERY_COUNTER 0x00000000
+
+#define NV50_QUERY_RES 0x00000004
+
+#define NV50_QUERY_TIME 0x00000008
+
+
+#endif /* NV50_DEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_draw.c b/src/gallium/drivers/nouveau/nv50/nv50_draw.c
new file mode 100644
index 00000000000..fa68cd8ee6a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_draw.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nv50/nv50_context.h"
+
+struct nv50_render_stage {
+ struct draw_stage stage;
+ struct nv50_context *nv50;
+};
+
+static INLINE struct nv50_render_stage *
+nv50_render_stage(struct draw_stage *stage)
+{
+ return (struct nv50_render_stage *)stage;
+}
+
+static void
+nv50_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nv50_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv50_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nv50_draw_render_stage(struct nv50_context *nv50)
+{
+ struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage);
+
+ rs->nv50 = nv50;
+ rs->stage.draw = nv50->draw;
+ rs->stage.destroy = nv50_render_destroy;
+ rs->stage.point = nv50_render_point;
+ rs->stage.line = nv50_render_line;
+ rs->stage.tri = nv50_render_tri;
+ rs->stage.flush = nv50_render_flush;
+ rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
new file mode 100644
index 00000000000..0a7e812ba13
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#if NOUVEAU_DRIVER == 0xc0
+# include "nvc0/nvc0_screen.h"
+# include "nvc0/nvc0_3d.xml.h"
+#else
+# include "nv50/nv50_screen.h"
+# include "nv50/nv50_3d.xml.h"
+#endif
+#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
+
+#include "pipe/p_defines.h"
+
+/* Abbreviated usage masks:
+ * T: texturing
+ * R: render target
+ * B: render target, blendable
+ * C: render target (color), blendable only on nvc0
+ * D: scanout/display target, blendable
+ * Z: depth/stencil
+ * V: vertex fetch
+ * I: image / surface, implies T
+ */
+#define U_V PIPE_BIND_VERTEX_BUFFER
+#define U_T PIPE_BIND_SAMPLER_VIEW
+#define U_I PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE
+#define U_TR PIPE_BIND_RENDER_TARGET | U_T
+#define U_IR U_TR | U_I
+#define U_TB PIPE_BIND_BLENDABLE | U_TR
+#define U_IB PIPE_BIND_BLENDABLE | U_IR
+#define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB
+#define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T
+#define U_TV U_V | U_T
+#define U_TRV U_V | U_TR
+#define U_IRV U_V | U_IR
+#define U_TBV U_V | U_TB
+#define U_IBV U_V | U_IB
+#define U_TDV U_V | U_TD
+#if NOUVEAU_DRIVER == 0xc0
+# define U_TC U_TB
+# define U_IC U_IB
+# define U_TCV U_TBV
+# define U_ICV U_IBV
+# define U_tV U_TV
+#else
+# define U_TC U_TR
+# define U_IC U_IR
+# define U_TCV U_TRV
+# define U_ICV U_IRV
+# define U_tV U_V
+#endif
+
+#define NV50_SURFACE_FORMAT_NONE 0
+#define NV50_ZETA_FORMAT_NONE 0
+
+/* for vertex buffers: */
+#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8
+#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16
+#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32
+
+#if NOUVEAU_DRIVER == 0xc0
+# define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s
+# define NVXX_3D_VAF_TYPE(t) NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_##t
+#else
+# define NVXX_3D_VAF_SIZE(s) NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_##s
+# define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t
+#endif
+
+#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \
+ [PIPE_FORMAT_##pf] = { \
+ sf, \
+ (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, \
+ NVXX_3D_VAF_SIZE(sz) | \
+ NVXX_3D_VAF_TYPE(t0) | (br << 31), \
+ U_##u \
+ }
+
+#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
+ [PIPE_FORMAT_##pf] = { \
+ sf, \
+ (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \
+ (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \
+ (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \
+ (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \
+ (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \
+ (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \
+ (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \
+ (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \
+ NV50_TIC_0_FMT_##sz, 0, U_##u \
+ }
+
+#define C4A(p, n, r, g, b, a, t, s, u, br) \
+ TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br)
+#define C4B(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
+
+#define ZXB(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define ZSB(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
+#define SZB(p, n, r, g, b, a, t, s, u) \
+ TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \
+ r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
+
+#define F3A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0)
+#define I3A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, b, ONE_INT, t, s, u, 0)
+#define F3B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, b, ONE_FLOAT, t, s, u)
+#define I3B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, b, ONE_INT, t, s, u)
+
+#define F2A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0)
+#define I2A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0)
+#define F2B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
+#define I2B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, g, ZERO, ONE_INT, t, s, u)
+
+#define F1A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0)
+#define I1A(p, n, r, g, b, a, t, s, u) \
+ C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0)
+#define F1B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
+#define I1B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
+
+#define A1B(p, n, r, g, b, a, t, s, u) \
+ C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u)
+
+#if NOUVEAU_DRIVER == 0xc0
+const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] =
+#else
+const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+#endif
+{
+ C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1),
+ F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
+ C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1),
+ F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD),
+ C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0),
+ F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
+ C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0),
+ F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB),
+
+ ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ),
+ ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ),
+ ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ),
+ ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ),
+ ZSB(X24S8_UINT, NONE, C1, C1, C1, xx, UNORM, Z24_S8, T),
+ SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ),
+ SZB(S8X24_UINT, NONE, C0, C0, C0, xx, UNORM, S8_Z24, T),
+ ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT,
+ Z32_S8_X24, TZ),
+ ZSB(X32_S8X24_UINT, NONE, C1, C1, C1, xx, FLOAT, Z32_S8_X24, T),
+
+ F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, T),
+ C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TB),
+ F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TB),
+ C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
+ F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+ F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
+
+ C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
+ IBV, 0),
+ C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2,
+ TBV, 1),
+ C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0),
+ C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1),
+
+ F3B(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IB),
+
+ F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
+ F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB),
+ F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC),
+ I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR),
+ I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR),
+ F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC),
+ F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC),
+ F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB),
+ I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR),
+ I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR),
+ F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB),
+ I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR),
+ I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR),
+
+ C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR),
+ C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR),
+ C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR),
+ C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR),
+ C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR),
+ C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR),
+ C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR),
+ C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR),
+ C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR),
+ C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR),
+ C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR),
+ C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR),
+
+ A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB),
+ A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T),
+ A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T),
+ A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T),
+ A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T),
+ A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T),
+ A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T),
+ A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T),
+ A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T),
+ A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T),
+ A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T),
+ A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T),
+
+ C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T),
+ C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
+ C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T),
+ C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T),
+ C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T),
+ C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T),
+ C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T),
+ C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T),
+ C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T),
+ C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T),
+ C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T),
+ C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T),
+ C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T),
+ C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T),
+
+ F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
+ F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T),
+ C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
+ C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T),
+ C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
+ C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T),
+ C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
+ C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T),
+
+ F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T),
+ F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T),
+ F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T),
+ F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T),
+ F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T),
+ F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T),
+ C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T),
+ C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T),
+
+ C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32,
+ IBV, 0),
+ C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0),
+ C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0),
+ C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32,
+ IRV, 0),
+ C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32,
+ IRV, 0),
+ F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB),
+ I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR),
+ I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR),
+
+ F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV),
+ F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV),
+ F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV),
+ I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV),
+ I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV),
+
+ F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV),
+ F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV),
+ F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV),
+ I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV),
+ I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV),
+
+ C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16,
+ IBV, 0),
+ C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16,
+ ICV, 0),
+ C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16,
+ ICV, 0),
+ C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16,
+ IRV, 0),
+ C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16,
+ IRV, 0),
+ F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB),
+ F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T),
+ F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T),
+ I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T),
+ I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T),
+
+ F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV),
+ F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV),
+ F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV),
+ I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV),
+ I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV),
+
+ F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV),
+ F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV),
+ F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV),
+ I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV),
+ I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV),
+
+ C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0),
+ C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0),
+ C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0),
+ F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T),
+ I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T),
+ I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T),
+
+ F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV),
+ F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV),
+ I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV),
+ I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV),
+
+ F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV),
+ F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV),
+ I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV),
+ I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV),
+
+ F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T),
+ F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T),
+ F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T),
+ F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T),
+
+ F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T),
+
+ C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T),
+ C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T),
+ C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T),
+ C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T),
+
+ TBLENT_B_(R8SG8SB8UX8U_NORM, 0,
+ C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T),
+ TBLENT_B_(R5SG5SB6U_NORM, 0,
+ C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T),
+
+ /* vertex-only formats: */
+
+ C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0),
+ C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0),
+ F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV),
+ F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V),
+ F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V),
+ I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV),
+ I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV),
+ F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V),
+ F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V),
+ F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V),
+ F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V),
+ F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V),
+ F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V),
+
+ C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0),
+ C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0),
+ F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V),
+ F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V),
+ F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V),
+ I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V),
+ I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V),
+ F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V),
+ F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V),
+ F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V),
+ F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V),
+ F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V),
+ F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V),
+
+ C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0),
+ C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0),
+ F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V),
+ F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V),
+ I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V),
+ I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V),
+ F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V),
+ F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V),
+ F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V),
+ F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V),
+ F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V),
+ F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V),
+
+ /* FIXED types: not supported natively, converted on VBO push */
+
+ C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
+ F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
+ F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
+ F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+
+ C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V),
+ F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V),
+ F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V),
+ F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V),
+};
+
+#if 0
+const uint8_t nv50_rt_format_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_Z16_UNORM] = NV50_ZETA_FORMAT_Z16_UNORM,
+ [PIPE_FORMAT_Z24X8_UNORM] = NV50_ZETA_FORMAT_Z24_X8_UNORM,
+ [PIPE_FORMAT_Z24_UNORM_S8_UINT] = NV50_ZETA_FORMAT_Z24_S8_UNORM,
+ [PIPE_FORMAT_S8_UINT_Z24_UNORM] = NV50_ZETA_FORMAT_S8_Z24_UNORM,
+ [PIPE_FORMAT_Z32_FLOAT] = NV50_ZETA_FORMAT_Z32_FLOAT,
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT,
+
+ [PIPE_FORMAT_R1_UNORM] = NV50_SURFACE_FORMAT_BITMAP,
+
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NV50_SURFACE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32X32_FLOAT] = NV50_SURFACE_FORMAT_RGBX32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = NV50_SURFACE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32X32_SINT] = NV50_SURFACE_FORMAT_RGBX32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = NV50_SURFACE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R32G32B32X32_UINT] = NV50_SURFACE_FORMAT_RGBX32_UINT,
+
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NV50_SURFACE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16X16_FLOAT] = NV50_SURFACE_FORMAT_RGBX16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = NV50_SURFACE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NV50_SURFACE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = NV50_SURFACE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = NV50_SURFACE_FORMAT_RGBA16_UINT,
+
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = NV50_SURFACE_FORMAT_BGRA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NV50_SURFACE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = NV50_SURFACE_FORMAT_BGRX8_UNORM,
+ [PIPE_FORMAT_R8G8B8X8_UNORM] = NV50_SURFACE_FORMAT_RGBX8_UNORM,
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = NV50_SURFACE_FORMAT_BGRA8_SRGB,
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = NV50_SURFACE_FORMAT_RGBA8_SRGB,
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = NV50_SURFACE_FORMAT_BGRX8_SRGB,
+ [PIPE_FORMAT_R8G8B8X8_SRGB] = NV50_SURFACE_FORMAT_RGBX8_SRGB,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = NV50_SURFACE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = NV50_SURFACE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = NV50_SURFACE_FORMAT_RGBA8_UINT,
+
+ [PIPE_FORMAT_R11G11B10_FLOAT] = NV50_SURFACE_FORMAT_R11G11B10_FLOAT,
+
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = NV50_SURFACE_FORMAT_BGR10_A2_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = NV50_SURFACE_FORMAT_RGB10_A2_UNORM,
+ [PIPE_FORMAT_R10G10B10A2_UINT] = NV50_SURFACE_FORMAT_RGB10_A2_UINT,
+
+ [PIPE_FORMAT_B5G6R5_UNORM] = NV50_SURFACE_FORMAT_B5G6R5_UNORM,
+
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = NV50_SURFACE_FORMAT_BGR5_A1_UNORM,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = NV50_SURFACE_FORMAT_BGR5_X1_UNORM,
+
+ [PIPE_FORMAT_R32G32_FLOAT] = NV50_SURFACE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = NV50_SURFACE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = NV50_SURFACE_FORMAT_RG32_UINT,
+
+ [PIPE_FORMAT_R16G16_FLOAT] = NV50_SURFACE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = NV50_SURFACE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NV50_SURFACE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = NV50_SURFACE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = NV50_SURFACE_FORMAT_RG16_UINT,
+
+ [PIPE_FORMAT_R8G8_UNORM] = NV50_SURFACE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = NV50_SURFACE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = NV50_SURFACE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = NV50_SURFACE_FORMAT_RG8_UINT,
+
+ [PIPE_FORMAT_R32_FLOAT] = NV50_SURFACE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = NV50_SURFACE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = NV50_SURFACE_FORMAT_R32_UINT,
+
+ [PIPE_FORMAT_R16_FLOAT] = NV50_SURFACE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = NV50_SURFACE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = NV50_SURFACE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = NV50_SURFACE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = NV50_SURFACE_FORMAT_R16_UINT,
+
+ [PIPE_FORMAT_R8_UNORM] = NV50_SURFACE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = NV50_SURFACE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = NV50_SURFACE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = NV50_SURFACE_FORMAT_R8_UINT,
+
+ [PIPE_FORMAT_A8_UNORM] = NV50_SURFACE_FORMAT_A8_UNORM
+};
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
new file mode 100644
index 00000000000..513d8f96aac
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+uint32_t
+nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz)
+{
+ uint32_t tile_mode = 0x000;
+
+ if (ny > 64) tile_mode = 0x040; /* height 128 tiles */
+ else
+ if (ny > 32) tile_mode = 0x030; /* height 64 tiles */
+ else
+ if (ny > 16) tile_mode = 0x020; /* height 32 tiles */
+ else
+ if (ny > 8) tile_mode = 0x010; /* height 16 tiles */
+
+ if (nz == 1)
+ return tile_mode;
+ else
+ if (tile_mode > 0x020)
+ tile_mode = 0x020;
+
+ if (nz > 16 && tile_mode < 0x020)
+ return tile_mode | 0x500; /* depth 32 tiles */
+ if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
+ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
+ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
+
+ return tile_mode | 0x100;
+}
+
+static uint32_t
+nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+ return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz);
+}
+
+static uint32_t
+nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+{
+ const unsigned ms = mt->ms_x + mt->ms_y;
+
+ uint32_t tile_flags;
+
+ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+ return 0;
+ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+ return 0;
+
+ switch (mt->base.base.format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ tile_flags = 0x6c + ms;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ tile_flags = 0x18 + ms;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ tile_flags = 0x128 + ms;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ tile_flags = 0x40 + ms;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ tile_flags = 0x60 + ms;
+ break;
+ default:
+ switch (util_format_get_blocksizebits(mt->base.base.format)) {
+ case 128:
+ assert(ms < 3);
+ tile_flags = 0x74;
+ break;
+ case 64:
+ switch (ms) {
+ case 2: tile_flags = 0xfc; break;
+ case 3: tile_flags = 0xfd; break;
+ default:
+ tile_flags = 0x70;
+ break;
+ }
+ break;
+ case 32:
+ if (mt->base.base.bind & PIPE_BIND_SCANOUT) {
+ assert(ms == 0);
+ tile_flags = 0x7a;
+ } else {
+ switch (ms) {
+ case 2: tile_flags = 0xf8; break;
+ case 3: tile_flags = 0xf9; break;
+ default:
+ tile_flags = 0x70;
+ break;
+ }
+ }
+ break;
+ case 16:
+ case 8:
+ tile_flags = 0x70;
+ break;
+ default:
+ return 0;
+ }
+ if (mt->base.base.bind & PIPE_BIND_CURSOR)
+ tile_flags = 0;
+ }
+
+ if (!compressed)
+ tile_flags &= ~0x180;
+
+ return tile_flags;
+}
+
+void
+nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+
+ nouveau_bo_ref(NULL, &mt->base.bo);
+
+ nouveau_fence_ref(NULL, &mt->base.fence);
+ nouveau_fence_ref(NULL, &mt->base.fence_wr);
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, -1);
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes,
+ -(uint64_t)mt->total_size);
+
+ FREE(mt);
+}
+
+boolean
+nv50_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ unsigned stride;
+
+ if (!mt || !mt->base.bo)
+ return FALSE;
+
+ stride = mt->level[0].pitch;
+
+ return nouveau_screen_bo_get_handle(pscreen,
+ mt->base.bo,
+ stride,
+ whandle);
+}
+
+const struct u_resource_vtbl nv50_miptree_vtbl =
+{
+ nv50_miptree_get_handle, /* get_handle */
+ nv50_miptree_destroy, /* resource_destroy */
+ nv50_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nv50_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+static INLINE boolean
+nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
+{
+ switch (mt->base.base.nr_samples) {
+ case 8:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8;
+ mt->ms_x = 2;
+ mt->ms_y = 1;
+ break;
+ case 4:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS4;
+ mt->ms_x = 1;
+ mt->ms_y = 1;
+ break;
+ case 2:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS2;
+ mt->ms_x = 1;
+ break;
+ case 1:
+ case 0:
+ mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
+ break;
+ default:
+ NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+boolean
+nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+ unsigned h = pt->height0;
+
+ if (util_format_is_depth_or_stencil(pt->format))
+ return FALSE;
+
+ if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1))
+ return FALSE;
+ if (mt->ms_x | mt->ms_y)
+ return FALSE;
+
+ mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align);
+
+ /* Account for very generous prefetch (allocate size as if tiled). */
+ h = MAX2(h, 8);
+ h = util_next_power_of_two(h);
+
+ mt->total_size = mt->level[0].pitch * h;
+
+ return TRUE;
+}
+
+static void
+nv50_miptree_init_layout_video(struct nv50_miptree *mt)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ assert(pt->last_level == 0);
+ assert(mt->ms_x == 0 && mt->ms_y == 0);
+ assert(!util_format_is_compressed(pt->format));
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ mt->level[0].tile_mode = 0x20;
+ mt->level[0].pitch = align(pt->width0 * blocksize, 64);
+ mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1);
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size, NV50_TILE_SIZE(0x20));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+static void
+nv50_miptree_init_layout_tiled(struct nv50_miptree *mt)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ unsigned w, h, d, l;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0 << mt->ms_x;
+ h = pt->height0 << mt->ms_y;
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ unsigned tsx, tsy, tsz;
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+
+ lvl->offset = mt->total_size;
+
+ lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d);
+
+ tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */
+ tsy = NV50_TILE_SIZE_Y(lvl->tile_mode);
+ tsz = NV50_TILE_SIZE_Z(lvl->tile_mode);
+
+ lvl->pitch = align(nbx * blocksize, tsx);
+
+ mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz);
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NV50_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+struct pipe_resource *
+nv50_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ int ret;
+ union nouveau_bo_config bo_config;
+ uint32_t bo_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nv50_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ if (pt->bind & PIPE_BIND_LINEAR)
+ pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+
+ bo_config.nv50.memtype = nv50_mt_choose_storage_type(mt, TRUE);
+
+ if (!nv50_miptree_init_ms_mode(mt)) {
+ FREE(mt);
+ return NULL;
+ }
+
+ if (unlikely(pt->flags & NV50_RESOURCE_FLAG_VIDEO)) {
+ nv50_miptree_init_layout_video(mt);
+ if (pt->flags & NV50_RESOURCE_FLAG_NOALLOC) {
+ /* BO allocation done by client */
+ return pt;
+ }
+ } else
+ if (bo_config.nv50.memtype != 0) {
+ nv50_miptree_init_layout_tiled(mt);
+ } else
+ if (!nv50_miptree_init_layout_linear(mt, 64)) {
+ FREE(mt);
+ return NULL;
+ }
+ bo_config.nv50.tile_mode = mt->level[0].tile_mode;
+
+ bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP;
+ if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))
+ bo_flags |= NOUVEAU_BO_CONTIG;
+
+ ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.address = mt->base.bo->offset;
+
+ return pt;
+}
+
+struct pipe_resource *
+nv50_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ struct nv50_miptree *mt;
+ unsigned stride;
+
+ /* only supports 2D, non-mipmapped textures for the moment */
+ if ((templ->target != PIPE_TEXTURE_2D &&
+ templ->target != PIPE_TEXTURE_RECT) ||
+ templ->last_level != 0 ||
+ templ->depth0 != 1 ||
+ templ->array_size > 1)
+ return NULL;
+
+ mt = CALLOC_STRUCT(nv50_miptree);
+ if (!mt)
+ return NULL;
+
+ mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+ if (mt->base.bo == NULL) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.domain = NOUVEAU_BO_VRAM;
+ mt->base.address = mt->base.bo->offset;
+
+ mt->base.base = *templ;
+ mt->base.vtbl = &nv50_miptree_vtbl;
+ pipe_reference_init(&mt->base.base.reference, 1);
+ mt->base.base.screen = pscreen;
+ mt->level[0].pitch = stride;
+ mt->level[0].offset = 0;
+ mt->level[0].tile_mode = mt->base.bo->config.nv50.tile_mode;
+
+ /* no need to adjust bo reference count */
+ return &mt->base.base;
+}
+
+
+/* Offset of zslice @z from start of level @l. */
+INLINE unsigned
+nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+
+ unsigned tds = NV50_TILE_SHIFT_Z(mt->level[l].tile_mode);
+ unsigned ths = NV50_TILE_SHIFT_Y(mt->level[l].tile_mode);
+
+ unsigned nby = util_format_get_nblocksy(pt->format,
+ u_minify(pt->height0, l));
+
+ /* to next 2D tile slice within a 3D tile */
+ unsigned stride_2d = NV50_TILE_SIZE_2D(mt->level[l].tile_mode);
+
+ /* to slice in the next (in z direction) 3D tile */
+ unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds;
+
+ return (z & ((1 << tds) - 1)) * stride_2d + (z >> tds) * stride_3d;
+}
+
+/* Surface functions.
+ */
+
+struct nv50_surface *
+nv50_surface_from_miptree(struct nv50_miptree *mt,
+ const struct pipe_surface *templ)
+{
+ struct pipe_surface *ps;
+ struct nv50_surface *ns = CALLOC_STRUCT(nv50_surface);
+ if (!ns)
+ return NULL;
+ ps = &ns->base;
+
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, &mt->base.base);
+
+ ps->format = templ->format;
+ ps->writable = templ->writable;
+ ps->u.tex.level = templ->u.tex.level;
+ ps->u.tex.first_layer = templ->u.tex.first_layer;
+ ps->u.tex.last_layer = templ->u.tex.last_layer;
+
+ ns->width = u_minify(mt->base.base.width0, ps->u.tex.level);
+ ns->height = u_minify(mt->base.base.height0, ps->u.tex.level);
+ ns->depth = ps->u.tex.last_layer - ps->u.tex.first_layer + 1;
+ ns->offset = mt->level[templ->u.tex.level].offset;
+
+ /* comment says there are going to be removed, but they're used by the st */
+ ps->width = ns->width;
+ ps->height = ns->height;
+
+ ns->width <<= mt->ms_x;
+ ns->height <<= mt->ms_y;
+
+ return ns;
+}
+
+struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
+{
+ struct nv50_miptree *mt = nv50_miptree(pt);
+ struct nv50_surface *ns = nv50_surface_from_miptree(mt, templ);
+ if (!ns)
+ return NULL;
+ ns->base.context = pipe;
+
+ if (ns->base.u.tex.first_layer) {
+ const unsigned l = ns->base.u.tex.level;
+ const unsigned z = ns->base.u.tex.first_layer;
+
+ if (mt->layout_3d) {
+ ns->offset += nv50_mt_zslice_offset(mt, l, z);
+
+ /* TODO: switch to depth 1 tiles; but actually this shouldn't happen */
+ if (ns->depth > 1 &&
+ (z & (NV50_TILE_SIZE_Z(mt->level[l].tile_mode) - 1)))
+ NOUVEAU_ERR("Creating unsupported 3D surface !\n");
+ } else {
+ ns->offset += mt->layer_stride * z;
+ }
+ }
+
+ return &ns->base;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
new file mode 100644
index 00000000000..73df71c61e2
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_program.h"
+#include "nv50/nv50_context.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+static INLINE unsigned
+bitcount4(const uint32_t val)
+{
+ static const uint8_t cnt[16]
+ = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+ return cnt[val & 0xf];
+}
+
+static int
+nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
+{
+ struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+ unsigned i, n, c;
+
+ n = 0;
+ for (i = 0; i < info->numInputs; ++i) {
+ prog->in[i].id = i;
+ prog->in[i].sn = info->in[i].sn;
+ prog->in[i].si = info->in[i].si;
+ prog->in[i].hw = n;
+ prog->in[i].mask = info->in[i].mask;
+
+ prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
+
+ for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+ info->in[i].slot[c] = n++;
+ }
+ prog->in_nr = info->numInputs;
+
+ for (i = 0; i < info->numSysVals; ++i) {
+ switch (info->sv[i].sn) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
+ continue;
+ case TGSI_SEMANTIC_VERTEXID:
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
+ prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
+ continue;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * Corner case: VP has no inputs, but we will still need to submit data to
+ * draw it. HW will shout at us and won't draw anything if we don't enable
+ * any input, so let's just pretend it's the first one.
+ */
+ if (prog->vp.attrs[0] == 0 &&
+ prog->vp.attrs[1] == 0 &&
+ prog->vp.attrs[2] == 0)
+ prog->vp.attrs[0] |= 0xf;
+
+ /* VertexID before InstanceID */
+ if (info->io.vertexId < info->numSysVals)
+ info->sv[info->io.vertexId].slot[0] = n++;
+ if (info->io.instanceId < info->numSysVals)
+ info->sv[info->io.instanceId].slot[0] = n++;
+
+ n = 0;
+ for (i = 0; i < info->numOutputs; ++i) {
+ switch (info->out[i].sn) {
+ case TGSI_SEMANTIC_PSIZE:
+ prog->vp.psiz = i;
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ prog->vp.clpd[info->out[i].si] = n;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ prog->vp.edgeflag = i;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ prog->vp.bfc[info->out[i].si] = i;
+ break;
+ default:
+ break;
+ }
+ prog->out[i].id = i;
+ prog->out[i].sn = info->out[i].sn;
+ prog->out[i].si = info->out[i].si;
+ prog->out[i].hw = n;
+ prog->out[i].mask = info->out[i].mask;
+
+ for (c = 0; c < 4; ++c)
+ if (info->out[i].mask & (1 << c))
+ info->out[i].slot[c] = n++;
+ }
+ prog->out_nr = info->numOutputs;
+ prog->max_out = n;
+
+ if (prog->vp.psiz < info->numOutputs)
+ prog->vp.psiz = prog->out[prog->vp.psiz].hw;
+
+ return 0;
+}
+
+static int
+nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
+{
+ struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
+ unsigned i, n, m, c;
+ unsigned nvary;
+ unsigned nflat;
+ unsigned nintp = 0;
+
+ /* count recorded non-flat inputs */
+ for (m = 0, i = 0; i < info->numInputs; ++i) {
+ switch (info->in[i].sn) {
+ case TGSI_SEMANTIC_POSITION:
+ case TGSI_SEMANTIC_FACE:
+ continue;
+ default:
+ m += info->in[i].flat ? 0 : 1;
+ break;
+ }
+ }
+ /* careful: id may be != i in info->in[prog->in[i].id] */
+
+ /* Fill prog->in[] so that non-flat inputs are first and
+ * kick out special inputs that don't use the RESULT_MAP.
+ */
+ for (n = 0, i = 0; i < info->numInputs; ++i) {
+ if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
+ prog->fp.interp |= info->in[i].mask << 24;
+ for (c = 0; c < 4; ++c)
+ if (info->in[i].mask & (1 << c))
+ info->in[i].slot[c] = nintp++;
+ } else
+ if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
+ info->in[i].slot[0] = 255;
+ } else {
+ unsigned j = info->in[i].flat ? m++ : n++;
+
+ if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
+ prog->vp.bfc[info->in[i].si] = j;
+
+ prog->in[j].id = i;
+ prog->in[j].mask = info->in[i].mask;
+ prog->in[j].sn = info->in[i].sn;
+ prog->in[j].si = info->in[i].si;
+ prog->in[j].linear = info->in[i].linear;
+
+ prog->in_nr++;
+ }
+ }
+ if (!(prog->fp.interp & (8 << 24))) {
+ ++nintp;
+ prog->fp.interp |= 8 << 24;
+ }
+
+ for (i = 0; i < prog->in_nr; ++i) {
+ int j = prog->in[i].id;
+
+ prog->in[i].hw = nintp;
+ for (c = 0; c < 4; ++c)
+ if (prog->in[i].mask & (1 << c))
+ info->in[j].slot[c] = nintp++;
+ }
+ /* (n == m) if m never increased, i.e. no flat inputs */
+ nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
+ nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
+ nvary = nintp - nflat;
+
+ prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
+ prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
+
+ /* put front/back colors right after HPOS */
+ prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
+ for (i = 0; i < 2; ++i)
+ if (prog->vp.bfc[i] < 0xff)
+ prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
+
+ /* FP outputs */
+
+ if (info->prop.fp.numColourResults > 1)
+ prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ prog->out[i].id = i;
+ prog->out[i].sn = info->out[i].sn;
+ prog->out[i].si = info->out[i].si;
+ prog->out[i].mask = info->out[i].mask;
+
+ if (i == info->io.fragDepth || i == info->io.sampleMask)
+ continue;
+ prog->out[i].hw = info->out[i].si * 4;
+
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = prog->out[i].hw + c;
+
+ prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
+ }
+
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.sampleMask].slot[0] = prog->max_out++;
+
+ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.fragDepth].slot[2] = prog->max_out++;
+
+ if (!prog->max_out)
+ prog->max_out = 4;
+
+ return 0;
+}
+
+static int
+nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
+{
+ switch (info->type) {
+ case PIPE_SHADER_VERTEX:
+ return nv50_vertprog_assign_slots(info);
+ case PIPE_SHADER_GEOMETRY:
+ return nv50_vertprog_assign_slots(info);
+ case PIPE_SHADER_FRAGMENT:
+ return nv50_fragprog_assign_slots(info);
+ default:
+ return -1;
+ }
+}
+
+static struct nv50_stream_output_state *
+nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
+ const struct pipe_stream_output_info *pso)
+{
+ struct nv50_stream_output_state *so;
+ unsigned b, i, c;
+ unsigned base[4];
+
+ so = MALLOC_STRUCT(nv50_stream_output_state);
+ if (!so)
+ return NULL;
+ memset(so->map, 0xff, sizeof(so->map));
+
+ for (b = 0; b < 4; ++b)
+ so->num_attribs[b] = 0;
+ for (i = 0; i < pso->num_outputs; ++i) {
+ unsigned end = pso->output[i].dst_offset + pso->output[i].num_components;
+ b = pso->output[i].output_buffer;
+ assert(b < 4);
+ so->num_attribs[b] = MAX2(so->num_attribs[b], end);
+ }
+
+ so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
+
+ so->stride[0] = pso->stride[0] * 4;
+ base[0] = 0;
+ for (b = 1; b < 4; ++b) {
+ assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
+ so->stride[b] = so->num_attribs[b] * 4;
+ if (so->num_attribs[b])
+ so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
+ base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
+ }
+ if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
+ assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
+ so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
+ }
+
+ so->map_size = base[3] + so->num_attribs[3];
+
+ for (i = 0; i < pso->num_outputs; ++i) {
+ const unsigned s = pso->output[i].start_component;
+ const unsigned p = pso->output[i].dst_offset;
+ const unsigned r = pso->output[i].register_index;
+ b = pso->output[i].output_buffer;
+
+ for (c = 0; c < pso->output[i].num_components; ++c)
+ so->map[base[b] + p + c] = info->out[r].slot[s + c];
+ }
+
+ return so;
+}
+
+boolean
+nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
+{
+ struct nv50_ir_prog_info *info;
+ int ret;
+ const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80;
+
+ info = CALLOC_STRUCT(nv50_ir_prog_info);
+ if (!info)
+ return FALSE;
+
+ info->type = prog->type;
+ info->target = chipset;
+ info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+ info->bin.source = (void *)prog->pipe.tokens;
+
+ info->io.ucpCBSlot = 15;
+ info->io.ucpBase = 0;
+ info->io.genUserClip = prog->vp.clpd_nr;
+
+ info->assignSlots = nv50_program_assign_varying_slots;
+
+ prog->vp.bfc[0] = 0xff;
+ prog->vp.bfc[1] = 0xff;
+ prog->vp.edgeflag = 0xff;
+ prog->vp.clpd[0] = map_undef;
+ prog->vp.clpd[1] = map_undef;
+ prog->vp.psiz = map_undef;
+ prog->gp.primid = 0x80;
+
+ info->driverPriv = prog;
+
+#ifdef DEBUG
+ info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+ info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+ info->optLevel = 3;
+#endif
+
+ ret = nv50_ir_generate_code(info);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+ }
+ FREE(info->bin.syms);
+
+ prog->code = info->bin.code;
+ prog->code_size = info->bin.codeSize;
+ prog->fixups = info->bin.relocData;
+ prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
+ prog->tls_space = info->bin.tlsSpace;
+
+ if (prog->type == PIPE_SHADER_FRAGMENT) {
+ if (info->prop.fp.writesDepth) {
+ prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
+ prog->fp.flags[1] = 0x11;
+ }
+ if (info->prop.fp.usesDiscard)
+ prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
+ }
+
+ if (prog->pipe.stream_output.num_outputs)
+ prog->so = nv50_program_create_strmout_state(info,
+ &prog->pipe.stream_output);
+
+out:
+ FREE(info);
+ return !ret;
+}
+
+boolean
+nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
+{
+ struct nouveau_heap *heap;
+ int ret;
+ uint32_t size = align(prog->code_size, 0x40);
+
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
+ case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
+ case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
+ default:
+ assert(!"invalid program type");
+ return FALSE;
+ }
+
+ ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
+ if (ret) {
+ /* Out of space: evict everything to compactify the code segment, hoping
+ * the working set is much smaller and drifts slowly. Improve me !
+ */
+ while (heap->next) {
+ struct nv50_program *evict = heap->next->priv;
+ if (evict)
+ nouveau_heap_free(&evict->mem);
+ }
+ debug_printf("WARNING: out of code space, evicting all shaders.\n");
+ ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
+ if (ret) {
+ NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
+ return FALSE;
+ }
+ }
+ prog->code_base = prog->mem->start;
+
+ ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
+ if (ret < 0)
+ return FALSE;
+ if (ret > 0)
+ nv50->state.new_tls_space = TRUE;
+
+ if (prog->fixups)
+ nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
+
+ nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
+ (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+
+ BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+
+ return TRUE;
+}
+
+void
+nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
+{
+ const struct pipe_shader_state pipe = p->pipe;
+ const ubyte type = p->type;
+
+ if (p->mem)
+ nouveau_heap_free(&p->mem);
+
+ FREE(p->code);
+
+ FREE(p->fixups);
+
+ FREE(p->so);
+
+ memset(p, 0, sizeof(*p));
+
+ p->pipe = pipe;
+ p->type = type;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
new file mode 100644
index 00000000000..13b9516a3e4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NV50_PROG_H__
+#define __NV50_PROG_H__
+
+struct nv50_context;
+
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+
+struct nv50_varying {
+ uint8_t id; /* tgsi index */
+ uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
+
+ unsigned mask : 4;
+ unsigned linear : 1;
+ unsigned pad : 3;
+
+ ubyte sn; /* semantic name */
+ ubyte si; /* semantic index */
+};
+
+struct nv50_stream_output_state
+{
+ uint32_t ctrl;
+ uint16_t stride[4];
+ uint8_t num_attribs[4];
+ uint8_t map_size;
+ uint8_t map[128];
+};
+
+struct nv50_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+
+ uint32_t *code;
+ unsigned code_size;
+ unsigned code_base;
+ uint32_t *immd;
+ unsigned immd_size;
+ unsigned parm_size; /* size limit of uniform buffer */
+ uint32_t tls_space; /* required local memory per thread */
+
+ ubyte max_gpr; /* REG_ALLOC_TEMP */
+ ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */
+
+ ubyte in_nr;
+ ubyte out_nr;
+ struct nv50_varying in[16];
+ struct nv50_varying out[16];
+
+ struct {
+ uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */
+ ubyte psiz; /* output slot of point size */
+ ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
+ ubyte edgeflag;
+ ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
+ ubyte clpd_nr;
+ } vp;
+
+ struct {
+ uint32_t flags[2]; /* 0x19a8, 196c */
+ uint32_t interp; /* 0x1988 */
+ uint32_t colors; /* 0x1904 */
+ } fp;
+
+ struct {
+ ubyte primid; /* primitive id output register */
+ uint8_t vert_count;
+ uint8_t prim_type; /* point, line strip or tri strip */
+ } gp;
+
+ void *fixups; /* relocation records */
+
+ struct nouveau_heap *mem;
+
+ struct nv50_stream_output_state *so;
+};
+
+boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
+boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
+void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
+
+#endif /* __NV50_PROG_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
new file mode 100644
index 00000000000..3e9a4096cf0
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -0,0 +1,309 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+#include "nv50/nv50_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ const void *idxbuf;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ uint32_t prim;
+ uint32_t restart_index;
+ uint32_t instance_id;
+};
+
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *elts = (uint8_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+
+ ctx->push->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (ctx->push, ctx->restart_index);
+ }
+ }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *elts = (uint16_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+
+ ctx->push->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (ctx->push, ctx->restart_index);
+ }
+ }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *elts = (uint32_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+
+ ctx->push->cur += size;
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NV04(ctx->push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (ctx->push, ctx->restart_index);
+ }
+ }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ BEGIN_NI04(ctx->push, NV50_3D(VERTEX_DATA), size);
+
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+ count -= push;
+ start += push;
+ }
+}
+
+
+#define NV50_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nv50_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NV50_PRIM_GL_CASE(POINTS);
+ NV50_PRIM_GL_CASE(LINES);
+ NV50_PRIM_GL_CASE(LINE_LOOP);
+ NV50_PRIM_GL_CASE(LINE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLES);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLE_FAN);
+ NV50_PRIM_GL_CASE(QUADS);
+ NV50_PRIM_GL_CASE(QUAD_STRIP);
+ NV50_PRIM_GL_CASE(POLYGON);
+ NV50_PRIM_GL_CASE(LINES_ADJACENCY);
+ NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NV50_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ boolean apply_bias = info->indexed && info->index_bias;
+
+ ctx.push = nv50->base.pushbuf;
+ ctx.translate = nv50->vertex->translate;
+ ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit;
+ ctx.vertex_words = nv50->vertex->vertex_size;
+
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
+ const uint8_t *data;
+
+ if (unlikely(vb->buffer))
+ data = nouveau_resource_map_offset(&nv50->base,
+ nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD);
+ else
+ data = vb->user_buffer;
+
+ if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i))))
+ data += (ptrdiff_t)info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+ }
+
+ if (info->indexed) {
+ if (nv50->idxbuf.buffer) {
+ ctx.idxbuf = nouveau_resource_map_offset(&nv50->base,
+ nv04_resource(nv50->idxbuf.buffer), nv50->idxbuf.offset,
+ NOUVEAU_BO_RD);
+ } else {
+ ctx.idxbuf = nv50->idxbuf.user_buffer;
+ }
+ if (!ctx.idxbuf)
+ return;
+ index_size = nv50->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ if (unlikely(info->count_from_stream_output)) {
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct nv50_so_target *targ;
+ targ = nv50_so_target(info->count_from_stream_output);
+ if (!targ->pq) {
+ NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+ return;
+ }
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ vert_count /= targ->stride;
+ }
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+ ctx.prim = nv50_prim_gl(info->mode);
+
+ if (info->primitive_restart) {
+ BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (ctx.push, 1);
+ PUSH_DATA (ctx.push, info->restart_index);
+ } else
+ if (nv50->state.prim_restart) {
+ BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 1);
+ PUSH_DATA (ctx.push, 0);
+ }
+ nv50->state.prim_restart = info->primitive_restart;
+
+ while (inst_count--) {
+ BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ BEGIN_NV04(ctx.push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (ctx.push, 0);
+
+ ctx.instance_id++;
+ ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
new file mode 100644
index 00000000000..6f25a0822c4
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nv50/nv50_context.h"
+#include "nv_object.xml.h"
+
+/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
+ * (since we use only a single GPU channel per screen) will not work properly.
+ *
+ * The first is not that big of an issue because OpenGL does not allow nested
+ * queries anyway.
+ */
+
+struct nv50_query {
+ uint32_t *data;
+ uint16_t type;
+ uint16_t index;
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * 16 */
+ boolean ready;
+ boolean flushed;
+ boolean is64bit;
+ struct nouveau_mm_allocation *mm;
+};
+
+#define NV50_QUERY_ALLOC_SPACE 128
+
+static INLINE struct nv50_query *
+nv50_query(struct pipe_query *pipe)
+{
+ return (struct nv50_query *)pipe;
+}
+
+static boolean
+nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
+{
+ struct nv50_screen *screen = nv50->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->mm) {
+ if (q->ready)
+ nouveau_mm_free(q->mm);
+ else
+ nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
+ q->mm);
+ }
+ }
+ if (size) {
+ q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map(q->bo, 0, screen->base.client);
+ if (ret) {
+ nv50_query_allocate(nv50, q, 0);
+ return FALSE;
+ }
+ q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
+ }
+ return TRUE;
+}
+
+static void
+nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
+ FREE(nv50_query(pq));
+}
+
+static struct pipe_query *
+nv50_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q;
+
+ q = CALLOC_STRUCT(nv50_query);
+ if (!q)
+ return NULL;
+
+ if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
+ type == PIPE_QUERY_PRIMITIVES_EMITTED ||
+ type == PIPE_QUERY_SO_STATISTICS);
+ q->type = type;
+
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset -= 16;
+ q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+ }
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, get);
+}
+
+static void
+nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_query *q = nv50_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+ q->offset += 16;
+ q->data += 16 / sizeof(*q->data);
+ if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
+ nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ }
+ if (!q->is64bit)
+ q->data[0] = q->sequence++; /* the previously used one */
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_query_get(push, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_query_get(push, q, 0x10, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_query_get(push, q, 0x20, 0x05805002);
+ nv50_query_get(push, q, 0x30, 0x06805002);
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_query_get(push, q, 0x10, 0x00005002);
+ break;
+ default:
+ break;
+ }
+ q->ready = FALSE;
+}
+
+static void
+nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_query *q = nv50_query(pq);
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ nv50_query_get(push, q, 0, 0x0100f002);
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_query_get(push, q, 0, 0x06805002);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nv50_query_get(push, q, 0, 0x05805002);
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nv50_query_get(push, q, 0x00, 0x05805002);
+ nv50_query_get(push, q, 0x10, 0x06805002);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ q->sequence++;
+ /* fall through */
+ case PIPE_QUERY_TIME_ELAPSED:
+ nv50_query_get(push, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ q->sequence++;
+ nv50_query_get(push, q, 0, 0x1000f010);
+ break;
+ case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ q->ready = q->flushed = FALSE;
+}
+
+static INLINE boolean
+nv50_query_ready(struct nv50_query *q)
+{
+ return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+}
+
+static boolean
+nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, union pipe_query_result *result)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nv50_query *q = nv50_query(pq);
+ uint64_t *res64 = (uint64_t *)result;
+ uint32_t *res32 = (uint32_t *)result;
+ boolean *res8 = (boolean *)result;
+ uint64_t *data64 = (uint64_t *)q->data;
+
+ if (!q->ready) /* update ? */
+ q->ready = nv50_query_ready(q);
+ if (!q->ready) {
+ if (!wait) {
+ /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
+ if (!q->flushed) {
+ q->flushed = TRUE;
+ PUSH_KICK(nv50->base.pushbuf);
+ }
+ return FALSE;
+ }
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
+ return FALSE;
+ }
+ q->ready = TRUE;
+
+ switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res8[0] = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = q->data[1];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0] - data64[2];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ res64[0] = 1000000000;
+ res8[8] = FALSE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ res32[0] = q->data[1];
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+{
+ struct nv50_query *q = nv50_query(pq);
+ unsigned offset = q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
+static void
+nv50_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_query *q;
+
+ nv50->cond_query = pq;
+ nv50->cond_cond = condition;
+ nv50->cond_mode = mode;
+
+ PUSH_SPACE(push, 6);
+
+ if (!pq) {
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nv50_query(pq);
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, q->bo->offset + q->offset);
+ PUSH_DATA (push, q->bo->offset + q->offset);
+ PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO);
+}
+
+void
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+ struct pipe_query *pq, unsigned result_offset)
+{
+ struct nv50_query *q = nv50_query(pq);
+
+ /* XXX: does this exist ? */
+#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
+ NV50_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nva0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, boolean serialize)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+
+ if (serialize) {
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ nv50_query(targ->pq)->index = index;
+ nv50_query_end(pipe, targ->pq);
+}
+
+void
+nv50_init_query_functions(struct nv50_context *nv50)
+{
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->create_query = nv50_query_create;
+ pipe->destroy_query = nv50_query_destroy;
+ pipe->begin_query = nv50_query_begin;
+ pipe->end_query = nv50_query_end;
+ pipe->get_query_result = nv50_query_result;
+ pipe->render_condition = nv50_render_condition;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.c b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
new file mode 100644
index 00000000000..7fbb0a92bf6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.c
@@ -0,0 +1,104 @@
+
+#include "pipe/p_context.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau_screen.h"
+
+#include "nv50/nv50_resource.h"
+
+static struct pipe_resource *
+nv50_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nouveau_buffer_create(screen, templ);
+ default:
+ return nv50_miptree_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *
+nv50_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return nv50_miptree_from_handle(screen, templ, whandle);
+}
+
+struct pipe_surface *
+nv50_surface_from_buffer(struct pipe_context *pipe,
+ struct pipe_resource *pbuf,
+ const struct pipe_surface *templ)
+{
+ struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface);
+ if (!sf)
+ return NULL;
+
+ pipe_reference_init(&sf->base.reference, 1);
+ pipe_resource_reference(&sf->base.texture, pbuf);
+
+ sf->base.format = templ->format;
+ sf->base.writable = templ->writable;
+ sf->base.u.buf.first_element = templ->u.buf.first_element;
+ sf->base.u.buf.last_element = templ->u.buf.last_element;
+
+ sf->offset =
+ templ->u.buf.first_element * util_format_get_blocksize(sf->base.format);
+
+ sf->offset &= ~0x7f; /* FIXME: RT_ADDRESS requires 128 byte alignment */
+
+ sf->width = templ->u.buf.last_element - templ->u.buf.first_element + 1;
+ sf->height = 1;
+ sf->depth = 1;
+
+ sf->base.width = sf->width;
+ sf->base.height = sf->height;
+
+ sf->base.context = pipe;
+ return &sf->base;
+}
+
+static struct pipe_surface *
+nv50_surface_create(struct pipe_context *pipe,
+ struct pipe_resource *pres,
+ const struct pipe_surface *templ)
+{
+ if (unlikely(pres->target == PIPE_BUFFER))
+ return nv50_surface_from_buffer(pipe, pres, templ);
+ return nv50_miptree_surface_new(pipe, pres, templ);
+}
+
+void
+nv50_surface_destroy(struct pipe_context *pipe, struct pipe_surface *ps)
+{
+ struct nv50_surface *s = nv50_surface(ps);
+
+ pipe_resource_reference(&ps->texture, NULL);
+
+ FREE(s);
+}
+
+void
+nv50_init_resource_functions(struct pipe_context *pcontext)
+{
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->create_surface = nv50_surface_create;
+ pcontext->surface_destroy = nv50_surface_destroy;
+}
+
+void
+nv50_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nv50_resource_create;
+ pscreen->resource_from_handle = nv50_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
new file mode 100644
index 00000000000..c06daa31c5d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -0,0 +1,153 @@
+
+#ifndef __NV50_RESOURCE_H__
+#define __NV50_RESOURCE_H__
+
+#include "util/u_transfer.h"
+#include "util/u_double_list.h"
+
+#include "nouveau_winsys.h"
+#include "nouveau_buffer.h"
+
+#ifndef __NVC0_RESOURCE_H__ /* make sure we don't use these in nvc0: */
+
+void
+nv50_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+#define NV50_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0)
+#define NV50_RESOURCE_FLAG_NOALLOC (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 1)
+
+#define NV50_TILE_SHIFT_X(m) 6
+#define NV50_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 2)
+#define NV50_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0)
+
+#define NV50_TILE_SIZE_X(m) 64
+#define NV50_TILE_SIZE_Y(m) ( 4 << (((m) >> 4) & 0xf))
+#define NV50_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf))
+
+#define NV50_TILE_SIZE_2D(m) (NV50_TILE_SIZE_X(m) << NV50_TILE_SHIFT_Y(m))
+
+#define NV50_TILE_SIZE(m) (NV50_TILE_SIZE_2D(m) << NV50_TILE_SHIFT_Z(m))
+
+#endif /* __NVC0_RESOURCE_H__ */
+
+uint32_t
+nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz);
+
+struct nv50_miptree_level {
+ uint32_t offset;
+ uint32_t pitch;
+ uint32_t tile_mode;
+};
+
+#define NV50_MAX_TEXTURE_LEVELS 16
+
+struct nv50_miptree {
+ struct nv04_resource base;
+ struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
+ uint32_t total_size;
+ uint32_t layer_stride;
+ boolean layout_3d; /* TRUE if layer count varies with mip level */
+ uint8_t ms_x; /* log2 of number of samples in x/y dimension */
+ uint8_t ms_y;
+ uint8_t ms_mode;
+};
+
+static INLINE struct nv50_miptree *
+nv50_miptree(struct pipe_resource *pt)
+{
+ return (struct nv50_miptree *)pt;
+}
+
+
+#define NV50_TEXVIEW_SCALED_COORDS (1 << 0)
+#define NV50_TEXVIEW_FILTER_MSAA8 (1 << 1)
+#define NV50_TEXVIEW_ACCESS_RESOLVE (1 << 2)
+
+
+/* Internal functions:
+ */
+boolean
+nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align);
+
+struct pipe_resource *
+nv50_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmp);
+
+void
+nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt);
+
+struct pipe_resource *
+nv50_miptree_from_handle(struct pipe_screen *pscreen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle);
+
+boolean
+nv50_miptree_get_handle(struct pipe_screen *pscreen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle);
+
+struct nv50_surface {
+ struct pipe_surface base;
+ uint32_t offset;
+ uint32_t width;
+ uint16_t height;
+ uint16_t depth;
+};
+
+static INLINE struct nv50_surface *
+nv50_surface(struct pipe_surface *ps)
+{
+ return (struct nv50_surface *)ps;
+}
+
+static INLINE enum pipe_format
+nv50_zs_to_s_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT: return PIPE_FORMAT_X24S8_UINT;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM: return PIPE_FORMAT_S8X24_UINT;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return PIPE_FORMAT_X32_S8X24_UINT;
+ default:
+ return format;
+ }
+}
+
+#ifndef __NVC0_RESOURCE_H__
+
+unsigned
+nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z);
+
+struct pipe_surface *
+nv50_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
+
+void *
+nv50_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer);
+void
+nv50_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+#endif /* __NVC0_RESOURCE_H__ */
+
+struct nv50_surface *
+nv50_surface_from_miptree(struct nv50_miptree *mt,
+ const struct pipe_surface *templ);
+
+struct pipe_surface *
+nv50_surface_from_buffer(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ);
+
+void
+nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
+
+#endif /* __NV50_RESOURCE_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
new file mode 100644
index 00000000000..f454ec77656
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -0,0 +1,845 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_screen.h"
+
+#include "nouveau_vp3_video.h"
+
+#include "nv_object.xml.h"
+#include <errno.h>
+
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
+#define LOCAL_WARPS_ALLOC 32
+/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
+#define STACK_WARPS_ALLOC 32
+
+#define THREADS_IN_WARP 32
+
+#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))
+
+static boolean
+nv50_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings)
+{
+ if (sample_count > 8)
+ return FALSE;
+ if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
+ return FALSE;
+ if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
+ return FALSE;
+
+ if (!util_format_is_supported(format, bindings))
+ return FALSE;
+
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nv50_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
+ switch (param) {
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 64;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 14;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 12;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 14;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 512;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 7;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_SCALED_RESOLVE:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return 0;
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return 0;
+ /*
+ return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
+ */
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 140;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return 1;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ return 64;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_START_INSTANCE:
+ return 1;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ return 0; /* state trackers will know better */
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 1;
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ return 0;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static int
+nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 4;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ return 0x300 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return NV50_MAX_PIPE_CONSTBUFS;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0; /* please inline, or provide function declarations */
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ return 32;
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 64.0f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 4.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
+}
+
+static void
+nv50_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
+ if (screen->base.fence.current) {
+ nouveau_fence_wait(screen->base.fence.current);
+ nouveau_fence_ref (NULL, &screen->base.fence.current);
+ }
+ if (screen->base.pushbuf)
+ screen->base.pushbuf->user_priv = NULL;
+
+ if (screen->blitter)
+ nv50_blitter_destroy(screen);
+
+ nouveau_bo_ref(NULL, &screen->code);
+ nouveau_bo_ref(NULL, &screen->tls_bo);
+ nouveau_bo_ref(NULL, &screen->stack_bo);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->uniforms);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+
+ nouveau_heap_destroy(&screen->vp_code_heap);
+ nouveau_heap_destroy(&screen->gp_code_heap);
+ nouveau_heap_destroy(&screen->fp_code_heap);
+
+ FREE(screen->tic.entries);
+
+ nouveau_object_del(&screen->tesla);
+ nouveau_object_del(&screen->eng2d);
+ nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->sync);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static void
+nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
+{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ /* we need to do it after possible flush in MARK_RING */
+ *sequence = ++screen->base.fence.sequence;
+
+ PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
+ PUSH_DATAh(push, screen->fence.bo->offset);
+ PUSH_DATA (push, screen->fence.bo->offset);
+ PUSH_DATA (push, *sequence);
+ PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
+ NV50_3D_QUERY_GET_UNK4 |
+ NV50_3D_QUERY_GET_UNIT_CROP |
+ NV50_3D_QUERY_GET_TYPE_QUERY |
+ NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
+ NV50_3D_QUERY_GET_SHORT);
+}
+
+static u32
+nv50_screen_fence_update(struct pipe_screen *pscreen)
+{
+ return nv50_screen(pscreen)->fence.map[0];
+}
+
+static void
+nv50_screen_init_hwctx(struct nv50_screen *screen)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ struct nv04_fifo *fifo;
+ unsigned i;
+
+ fifo = (struct nv04_fifo *)screen->base.channel->data;
+
+ BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->m2mf->handle);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
+ PUSH_DATA (push, screen->sync->handle);
+ PUSH_DATA (push, fifo->vram);
+ PUSH_DATA (push, fifo->vram);
+
+ BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->eng2d->handle);
+ BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
+ PUSH_DATA (push, screen->sync->handle);
+ PUSH_DATA (push, fifo->vram);
+ PUSH_DATA (push, fifo->vram);
+ PUSH_DATA (push, fifo->vram);
+ BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
+ BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, SUBC_2D(0x0888), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->tesla->handle);
+
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+
+ BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
+ PUSH_DATA (push, screen->sync->handle);
+ BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
+ for (i = 0; i < 11; ++i)
+ PUSH_DATA(push, fifo->vram);
+ BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
+ for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
+ PUSH_DATA(push, fifo->vram);
+
+ BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
+ PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
+ BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
+ PUSH_DATA (push, 0xf);
+
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
+ PUSH_DATA (push, 0x18);
+ }
+
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
+ PUSH_DATA (push, 1);
+
+ if (screen->tesla->oclass >= NVA0_3D_CLASS) {
+ BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
+ PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ }
+
+ BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
+ PUSH_DATA (push, 0x3f);
+
+ BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
+ PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
+
+ BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
+ PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
+
+ BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
+ PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
+
+ BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->tls_bo->offset);
+ PUSH_DATA (push, screen->tls_bo->offset);
+ PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
+
+ BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->stack_bo->offset);
+ PUSH_DATA (push, screen->stack_bo->offset);
+ PUSH_DATA (push, 4);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
+ PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
+ PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
+ PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
+ PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
+ PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200);
+
+ BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
+ PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
+ PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
+ PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
+
+ /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9));
+ PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9));
+
+ /* max TIC (bits 4:8) & TSC bindings, per program type */
+ for (i = 0; i < 3; ++i) {
+ BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
+ PUSH_DATA (push, 0x54);
+ }
+
+ BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
+ PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ PUSH_DATA(push, 0);
+ BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 1.0f);
+
+ BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+#ifdef NV50_SCISSORS_CLIPPING
+ PUSH_DATA (push, 0x0000);
+#else
+ PUSH_DATA (push, 0x1080);
+#endif
+
+ BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
+ PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
+
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 8192 << 16);
+ PUSH_DATA (push, 8192 << 16);
+
+ BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
+ PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
+ BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
+ PUSH_DATA (push, 0x11111111);
+ BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
+ PUSH_DATA (push, 1);
+
+ PUSH_KICK (push);
+}
+
+static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
+ uint64_t *tls_size)
+{
+ struct nouveau_device *dev = screen->base.device;
+ int ret;
+
+ screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
+ ONE_TEMP_SIZE;
+ if (nouveau_mesa_debug)
+ debug_printf("allocating space for %u temps\n",
+ util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
+ *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
+ screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+ *tls_size, NULL, &screen->tls_bo);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+ int ret;
+ uint64_t tls_size;
+
+ if (tls_space < screen->cur_tls_space)
+ return 0;
+ if (tls_space > screen->max_tls_space) {
+ /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
+ * LOCAL_WARPS_NO_CLAMP) */
+ NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
+ (unsigned)(tls_space / ONE_TEMP_SIZE),
+ (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
+ return -ENOMEM;
+ }
+
+ nouveau_bo_ref(NULL, &screen->tls_bo);
+ ret = nv50_tls_alloc(screen, tls_space, &tls_size);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->tls_bo->offset);
+ PUSH_DATA (push, screen->tls_bo->offset);
+ PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
+
+ return 1;
+}
+
+struct pipe_screen *
+nv50_screen_create(struct nouveau_device *dev)
+{
+ struct nv50_screen *screen;
+ struct pipe_screen *pscreen;
+ struct nouveau_object *chan;
+ uint64_t value;
+ uint32_t tesla_class;
+ unsigned stack_size;
+ int ret;
+
+ screen = CALLOC_STRUCT(nv50_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
+ goto fail;
+ }
+
+ /* TODO: Prevent FIFO prefetch before transfer of index buffers and
+ * admit them to VRAM.
+ */
+ screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_VERTEX_BUFFER;
+ screen->base.sysmem_bindings |=
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+
+ screen->base.pushbuf->user_priv = screen;
+ screen->base.pushbuf->rsvd_kick = 5;
+
+ chan = screen->base.channel;
+
+ pscreen->destroy = nv50_screen_destroy;
+ pscreen->context_create = nv50_create;
+ pscreen->is_format_supported = nv50_screen_is_format_supported;
+ pscreen->get_param = nv50_screen_get_param;
+ pscreen->get_shader_param = nv50_screen_get_shader_param;
+ pscreen->get_paramf = nv50_screen_get_paramf;
+
+ nv50_screen_init_resource_functions(pscreen);
+
+ if (screen->base.device->chipset < 0x84 ||
+ debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ /* PMPEG */
+ nouveau_screen_init_vdec(&screen->base);
+ } else if (screen->base.device->chipset < 0x98 ||
+ screen->base.device->chipset == 0xa0) {
+ /* VP2 */
+ screen->base.base.get_video_param = nv84_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nv84_screen_video_supported;
+ } else {
+ /* VP3/4 */
+ screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
+ NULL, &screen->fence.bo);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
+ goto fail;
+ }
+
+ nouveau_bo_map(screen->fence.bo, 0, NULL);
+ screen->fence.map = screen->fence.bo->map;
+ screen->base.fence.emit = nv50_screen_fence_emit;
+ screen->base.fence.update = nv50_screen_fence_update;
+
+ ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
+ &(struct nv04_notify){ .length = 32 },
+ sizeof(struct nv04_notify), &screen->sync);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
+ NULL, 0, &screen->m2mf);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
+ NULL, 0, &screen->eng2d);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
+ goto fail;
+ }
+
+ switch (dev->chipset & 0xf0) {
+ case 0x50:
+ tesla_class = NV50_3D_CLASS;
+ break;
+ case 0x80:
+ case 0x90:
+ tesla_class = NV84_3D_CLASS;
+ break;
+ case 0xa0:
+ switch (dev->chipset) {
+ case 0xa0:
+ case 0xaa:
+ case 0xac:
+ tesla_class = NVA0_3D_CLASS;
+ break;
+ case 0xaf:
+ tesla_class = NVAF_3D_CLASS;
+ break;
+ default:
+ tesla_class = NVA3_3D_CLASS;
+ break;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
+ goto fail;
+ }
+ screen->base.class_3d = tesla_class;
+
+ ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
+ NULL, 0, &screen->tesla);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+ 3 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
+ goto fail;
+ }
+
+ nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+ nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+ nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
+
+ nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+
+ screen->TPs = util_bitcount(value & 0xffff);
+ screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
+
+ stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
+ STACK_WARPS_ALLOC * 64 * 8;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
+ &screen->stack_bo);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
+ goto fail;
+ }
+
+ uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
+ screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP *
+ ONE_TEMP_SIZE;
+ screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
+ screen->max_tls_space /= 2; /* half of vram */
+
+ /* hw can address max 64 KiB */
+ screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
+
+ uint64_t tls_size;
+ unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
+ ret = nv50_tls_alloc(screen, tls_space, &tls_size);
+ if (ret)
+ goto fail;
+
+ if (nouveau_mesa_debug)
+ debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
+ screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
+ &screen->uniforms);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
+ &screen->txc);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
+ goto fail;
+ }
+
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
+
+ if (!nv50_blitter_create(screen))
+ goto fail;
+
+ nv50_screen_init_hwctx(screen);
+
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+
+ return pscreen;
+
+fail:
+ nv50_screen_destroy(pscreen);
+ return NULL;
+}
+
+int
+nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
+{
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nv50_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
+}
+
+int
+nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
new file mode 100644
index 00000000000..091a3921a4b
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -0,0 +1,153 @@
+#ifndef __NV50_SCREEN_H__
+#define __NV50_SCREEN_H__
+
+#include "nouveau_screen.h"
+#include "nouveau_fence.h"
+#include "nouveau_mm.h"
+#include "nouveau_heap.h"
+
+#include "nv50/nv50_winsys.h"
+#include "nv50/nv50_stateobj.h"
+
+#define NV50_TIC_MAX_ENTRIES 2048
+#define NV50_TSC_MAX_ENTRIES 2048
+
+/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
+#define NV50_MAX_PIPE_CONSTBUFS 14
+
+struct nv50_context;
+
+#define NV50_CODE_BO_SIZE_LOG2 19
+
+#define NV50_SCREEN_RESIDENT_BO_COUNT 5
+
+struct nv50_blitter;
+
+struct nv50_screen {
+ struct nouveau_screen base;
+
+ struct nv50_context *cur_ctx;
+
+ struct nouveau_bo *code;
+ struct nouveau_bo *uniforms;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *stack_bo;
+ struct nouveau_bo *tls_bo;
+
+ unsigned TPs;
+ unsigned MPsInTP;
+ unsigned max_tls_space;
+ unsigned cur_tls_space;
+
+ struct nouveau_heap *vp_code_heap;
+ struct nouveau_heap *gp_code_heap;
+ struct nouveau_heap *fp_code_heap;
+
+ struct nv50_blitter *blitter;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NV50_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NV50_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ uint32_t *map;
+ struct nouveau_bo *bo;
+ } fence;
+
+ struct nouveau_object *sync;
+
+ struct nouveau_object *tesla;
+ struct nouveau_object *eng2d;
+ struct nouveau_object *m2mf;
+};
+
+static INLINE struct nv50_screen *
+nv50_screen(struct pipe_screen *screen)
+{
+ return (struct nv50_screen *)screen;
+}
+
+boolean nv50_blitter_create(struct nv50_screen *);
+void nv50_blitter_destroy(struct nv50_screen *);
+
+int nv50_screen_tic_alloc(struct nv50_screen *, void *);
+int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
+
+static INLINE void
+nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
+{
+ struct nv50_screen *screen = nv50_screen(res->base.screen);
+
+ if (res->mm) {
+ nouveau_fence_ref(screen->base.fence.current, &res->fence);
+ if (flags & NOUVEAU_BO_WR)
+ nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
+ }
+}
+
+static INLINE void
+nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
+{
+ if (likely(res->bo)) {
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nv50_resource_fence(res, flags);
+ }
+}
+
+struct nv50_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nv50_format nv50_format_table[];
+
+static INLINE void
+nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
+extern int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
new file mode 100644
index 00000000000..9144fc48d95
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nv50/nv50_context.h"
+
+void
+nv50_constbufs_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned s;
+
+ for (s = 0; s < 3; ++s) {
+ unsigned p;
+
+ if (s == PIPE_SHADER_FRAGMENT)
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
+ else
+ if (s == PIPE_SHADER_GEOMETRY)
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
+ else
+ p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
+
+ while (nv50->constbuf_dirty[s]) {
+ const int i = ffs(nv50->constbuf_dirty[s]) - 1;
+ nv50->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nv50->constbuf[s][i].user) {
+ const unsigned b = NV50_CB_PVP + s;
+ unsigned start = 0;
+ unsigned words = nv50->constbuf[s][0].size / 4;
+ if (i) {
+ NOUVEAU_ERR("user constbufs only supported in slot 0\n");
+ continue;
+ }
+ if (!nv50->state.uniform_buffer_bound[s]) {
+ nv50->state.uniform_buffer_bound[s] = TRUE;
+ BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
+ PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
+ }
+ while (words) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (start << 8) | b);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
+ PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr);
+
+ start += nr;
+ words -= nr;
+ }
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nv50->constbuf[s][i].u.buf);
+ if (res) {
+ /* TODO: allocate persistent bindings */
+ const unsigned b = s * 16 + i;
+
+ assert(nouveau_resource_mapped_by_gpu(&res->base));
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset);
+ PUSH_DATA (push, (b << 16) |
+ (nv50->constbuf[s][i].size & 0xffff));
+ BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
+ PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
+
+ BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+ } else {
+ BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
+ PUSH_DATA (push, (i << 8) | p | 0);
+ }
+ if (i == 0)
+ nv50->state.uniform_buffer_bound[s] = FALSE;
+ }
+ }
+ }
+}
+
+static boolean
+nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
+{
+ if (!prog->translated) {
+ prog->translated = nv50_program_translate(
+ prog, nv50->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ } else
+ if (prog->mem)
+ return TRUE;
+
+ return nv50_program_upload_code(nv50, prog);
+}
+
+static INLINE void
+nv50_program_update_context_state(struct nv50_context *nv50,
+ struct nv50_program *prog, int stage)
+{
+ const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+ if (prog && prog->tls_space) {
+ if (nv50->state.new_tls_space)
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ if (!nv50->state.tls_required || nv50->state.new_tls_space)
+ BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
+ nv50->state.new_tls_space = FALSE;
+ nv50->state.tls_required |= 1 << stage;
+ } else {
+ if (nv50->state.tls_required == (1 << stage))
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ nv50->state.tls_required &= ~(1 << stage);
+ }
+}
+
+void
+nv50_vertprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp = nv50->vertprog;
+
+ if (!nv50_program_validate(nv50, vp))
+ return;
+ nv50_program_update_context_state(nv50, vp, 0);
+
+ BEGIN_NV04(push, NV50_3D(VP_ATTR_EN(0)), 2);
+ PUSH_DATA (push, vp->vp.attrs[0]);
+ PUSH_DATA (push, vp->vp.attrs[1]);
+ BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_RESULT), 1);
+ PUSH_DATA (push, vp->max_out);
+ BEGIN_NV04(push, NV50_3D(VP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, vp->max_gpr);
+ BEGIN_NV04(push, NV50_3D(VP_START_ID), 1);
+ PUSH_DATA (push, vp->code_base);
+}
+
+void
+nv50_fragprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *fp = nv50->fragprog;
+
+ if (!nv50_program_validate(nv50, fp))
+ return;
+ nv50_program_update_context_state(nv50, fp, 1);
+
+ BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, fp->max_gpr);
+ BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
+ PUSH_DATA (push, fp->max_out);
+ BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
+ PUSH_DATA (push, fp->fp.flags[0]);
+ BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
+ PUSH_DATA (push, fp->fp.flags[1]);
+ BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
+ PUSH_DATA (push, fp->code_base);
+}
+
+void
+nv50_gmtyprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *gp = nv50->gmtyprog;
+
+ if (gp) {
+ BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_TEMP), 1);
+ PUSH_DATA (push, gp->max_gpr);
+ BEGIN_NV04(push, NV50_3D(GP_REG_ALLOC_RESULT), 1);
+ PUSH_DATA (push, gp->max_out);
+ BEGIN_NV04(push, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
+ PUSH_DATA (push, gp->gp.prim_type);
+ BEGIN_NV04(push, NV50_3D(GP_VERTEX_OUTPUT_COUNT), 1);
+ PUSH_DATA (push, gp->gp.vert_count);
+ BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
+ PUSH_DATA (push, gp->code_base);
+
+ nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
+ }
+ nv50_program_update_context_state(nv50, gp, 2);
+
+ /* GP_ENABLE is updated in linkage validation */
+}
+
+static void
+nv50_sprite_coords_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ uint32_t pntc[8], mode;
+ struct nv50_program *fp = nv50->fragprog;
+ unsigned i, c;
+ unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
+
+ if (!nv50->rast->pipe.point_quad_rasterization) {
+ if (nv50->state.point_sprite) {
+ BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
+ for (i = 0; i < 8; ++i)
+ PUSH_DATA(push, 0);
+
+ nv50->state.point_sprite = FALSE;
+ }
+ return;
+ } else {
+ nv50->state.point_sprite = TRUE;
+ }
+
+ memset(pntc, 0, sizeof(pntc));
+
+ for (i = 0; i < fp->in_nr; i++) {
+ unsigned n = util_bitcount(fp->in[i].mask);
+
+ if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
+ m += n;
+ continue;
+ }
+ if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
+ m += n;
+ continue;
+ }
+
+ for (c = 0; c < 4; ++c) {
+ if (fp->in[i].mask & (1 << c)) {
+ pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
+ ++m;
+ }
+ }
+ }
+
+ if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ mode = 0x00;
+ else
+ mode = 0x10;
+
+ BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1);
+ PUSH_DATA (push, mode);
+
+ BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
+ PUSH_DATAp(push, pntc, 8);
+}
+
+/* Validate state derived from shaders and the rasterizer cso. */
+void
+nv50_validate_derived_rs(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ uint32_t color, psize;
+
+ nv50_sprite_coords_validate(nv50);
+
+ if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
+ nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
+ BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
+ }
+
+ if (nv50->dirty & NV50_NEW_FRAGPROG)
+ return;
+ psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
+ color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+
+ if (nv50->rast->pipe.clamp_vertex_color)
+ color |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+
+ if (color != nv50->state.semantic_color) {
+ nv50->state.semantic_color = color;
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 1);
+ PUSH_DATA (push, color);
+ }
+
+ if (nv50->rast->pipe.point_size_per_vertex)
+ psize |= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
+
+ if (psize != nv50->state.semantic_psize) {
+ nv50->state.semantic_psize = psize;
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_PTSZ), 1);
+ PUSH_DATA (push, psize);
+ }
+}
+
+static int
+nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
+ struct nv50_varying *in, struct nv50_varying *out)
+{
+ int c;
+ uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
+
+ for (c = 0; c < 4; ++c) {
+ if (mf & 1) {
+ if (in->linear)
+ lin[mid / 32] |= 1 << (mid % 32);
+ if (mv & 1)
+ map[mid] = oid;
+ else
+ if (c == 3)
+ map[mid] |= 1;
+ ++mid;
+ }
+
+ oid += mv & 1;
+ mf >>= 1;
+ mv >>= 1;
+ }
+
+ return mid;
+}
+
+void
+nv50_fp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
+ struct nv50_program *fp = nv50->fragprog;
+ struct nv50_varying dummy;
+ int i, n, c, m;
+ uint32_t primid = 0;
+ uint32_t psiz = 0x000;
+ uint32_t interp = fp->fp.interp;
+ uint32_t colors = fp->fp.colors;
+ uint32_t lin[4];
+ uint8_t map[64];
+ uint8_t so_map[64];
+
+ if (!(nv50->dirty & (NV50_NEW_VERTPROG |
+ NV50_NEW_FRAGPROG |
+ NV50_NEW_GMTYPROG))) {
+ uint8_t bfc, ffc;
+ ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
+ bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
+ >> 8;
+ if (nv50->rast->pipe.light_twoside == ((ffc == bfc) ? 0 : 1))
+ return;
+ }
+
+ memset(lin, 0x00, sizeof(lin));
+
+ /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
+ * or is it the first byte ?
+ */
+ memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
+
+ dummy.mask = 0xf; /* map all components of HPOS */
+ dummy.linear = 0;
+ m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
+
+ for (c = 0; c < vp->vp.clpd_nr; ++c)
+ map[m++] = vp->vp.clpd[c / 4] + (c % 4);
+
+ colors |= m << 8; /* adjust BFC0 id */
+
+ dummy.mask = 0x0;
+
+ /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
+ if (nv50->rast->pipe.light_twoside) {
+ for (i = 0; i < 2; ++i) {
+ n = vp->vp.bfc[i];
+ if (fp->vp.bfc[i] >= fp->in_nr)
+ continue;
+ m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]],
+ (n < vp->out_nr) ? &vp->out[n] : &dummy);
+ }
+ }
+ colors += m - 4; /* adjust FFC0 id */
+ interp |= m << 8; /* set map id where 'normal' FP inputs start */
+
+ for (i = 0; i < fp->in_nr; ++i) {
+ for (n = 0; n < vp->out_nr; ++n)
+ if (vp->out[n].sn == fp->in[i].sn &&
+ vp->out[n].si == fp->in[i].si)
+ break;
+ m = nv50_vec4_map(map, m, lin,
+ &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
+ }
+
+ /* PrimitiveID either is replaced by the system value, or
+ * written by the geometry shader into an output register
+ */
+ if (fp->gp.primid < 0x80) {
+ primid = m;
+ map[m++] = vp->gp.primid;
+ }
+
+ if (nv50->rast->pipe.point_size_per_vertex) {
+ psiz = (m << 4) | 1;
+ map[m++] = vp->vp.psiz;
+ }
+
+ if (nv50->rast->pipe.clamp_vertex_color)
+ colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+
+ if (unlikely(vp->so)) {
+ /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
+ * gets written.
+ *
+ * TODO:
+ * Inverting vp->so->map (output -> offset) would probably speed this up.
+ */
+ memset(so_map, 0, sizeof(so_map));
+ for (i = 0; i < vp->so->map_size; ++i) {
+ if (vp->so->map[i] == 0xff)
+ continue;
+ for (c = 0; c < m; ++c)
+ if (map[c] == vp->so->map[i] && !so_map[c])
+ break;
+ if (c == m) {
+ c = m;
+ map[m++] = vp->so->map[i];
+ }
+ so_map[c] = 0x80 | i;
+ }
+ for (c = m; c & 3; ++c)
+ so_map[c] = 0;
+ }
+
+ n = (m + 3) / 4;
+ assert(m <= 64);
+
+ if (unlikely(nv50->gmtyprog)) {
+ BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP_SIZE), 1);
+ PUSH_DATA (push, m);
+ BEGIN_NV04(push, NV50_3D(GP_RESULT_MAP(0)), n);
+ PUSH_DATAp(push, map, n);
+ } else {
+ BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
+ PUSH_DATA (push, vp->vp.attrs[2]);
+
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_PRIM_ID), 1);
+ PUSH_DATA (push, primid);
+
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
+ PUSH_DATA (push, m);
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
+ PUSH_DATAp(push, map, n);
+ }
+
+ BEGIN_NV04(push, NV50_3D(SEMANTIC_COLOR), 4);
+ PUSH_DATA (push, colors);
+ PUSH_DATA (push, (vp->vp.clpd_nr << 8) | 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, psiz);
+
+ BEGIN_NV04(push, NV50_3D(FP_INTERPOLANT_CTRL), 1);
+ PUSH_DATA (push, interp);
+
+ nv50->state.interpolant_ctrl = interp;
+
+ nv50->state.semantic_color = colors;
+ nv50->state.semantic_psize = psiz;
+
+ BEGIN_NV04(push, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
+ PUSH_DATAp(push, lin, 4);
+
+ BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
+ PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
+
+ if (vp->so) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
+ PUSH_DATAp(push, so_map, n);
+ }
+}
+
+static int
+nv50_vp_gp_mapping(uint8_t *map, int m,
+ struct nv50_program *vp, struct nv50_program *gp)
+{
+ int i, j, c;
+
+ for (i = 0; i < gp->in_nr; ++i) {
+ uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
+
+ for (j = 0; j < vp->out_nr; ++j) {
+ if (vp->out[j].sn == gp->in[i].sn &&
+ vp->out[j].si == gp->in[i].si) {
+ mv = vp->out[j].mask;
+ oid = vp->out[j].hw;
+ break;
+ }
+ }
+
+ for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
+ if (mg & mv & 1)
+ map[m++] = oid;
+ else
+ if (mg & 1)
+ map[m++] = (c == 3) ? 0x41 : 0x40;
+ oid += mv & 1;
+ }
+ }
+ return m;
+}
+
+void
+nv50_gp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *gp = nv50->gmtyprog;
+ int m = 0;
+ int n;
+ uint8_t map[64];
+
+ if (!gp)
+ return;
+ memset(map, 0, sizeof(map));
+
+ m = nv50_vp_gp_mapping(map, m, vp, gp);
+
+ n = (m + 3) / 4;
+
+ BEGIN_NV04(push, NV50_3D(VP_GP_BUILTIN_ATTR_EN), 1);
+ PUSH_DATA (push, vp->vp.attrs[2] | gp->vp.attrs[2]);
+
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP_SIZE), 1);
+ PUSH_DATA (push, m);
+ BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
+ PUSH_DATAp(push, map, n);
+}
+
+void
+nv50_stream_output_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_stream_output_state *so;
+ uint32_t ctrl;
+ unsigned i;
+ unsigned prims = ~0;
+
+ so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
+
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ if (!so || !nv50->num_so_targets) {
+ if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+ PUSH_DATA (push, 0);
+ }
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+ PUSH_DATA (push, 1);
+ return;
+ }
+
+ /* previous TFB needs to complete */
+ if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ ctrl = so->ctrl;
+ if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
+ ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
+
+ BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
+ PUSH_DATA (push, ctrl);
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
+
+ for (i = 0; i < nv50->num_so_targets; ++i) {
+ struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
+ struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
+
+ const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
+
+ if (n == 4 && !targ->clean)
+ nv84_query_fifo_wait(push, targ->pq);
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
+ PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, so->num_attribs[i]);
+ if (n == 4) {
+ PUSH_DATA(push, targ->pipe.buffer_size);
+
+ BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
+ if (!targ->clean) {
+ assert(targ->pq);
+ nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+ } else {
+ PUSH_DATA(push, 0);
+ targ->clean = FALSE;
+ }
+ } else {
+ const unsigned limit = targ->pipe.buffer_size /
+ (so->stride[i] * nv50->state.prim_size);
+ prims = MIN2(prims, limit);
+ }
+ BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
+ }
+ if (prims != ~0) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+ PUSH_DATA (push, prims);
+ }
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
new file mode 100644
index 00000000000..7dceb51c19e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -0,0 +1,1110 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_format_srgb.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nv50/nv50_stateobj.h"
+#include "nv50/nv50_context.h"
+
+#include "nv50/nv50_3d.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+#include "nouveau_gldefs.h"
+
+/* Caveats:
+ * ! pipe_sampler_state.normalized_coords is ignored - rectangle textures will
+ * use non-normalized coordinates, everything else won't
+ * (The relevant bit is in the TIC entry and not the TSC entry.)
+ *
+ * ! pipe_sampler_state.seamless_cube_map is ignored - seamless filtering is
+ * always activated on NVA0 +
+ * (Give me the global bit, otherwise it's not worth the CPU work.)
+ *
+ * ! pipe_sampler_state.border_color is not swizzled according to the texture
+ * swizzle in pipe_sampler_view
+ * (This will be ugly with indirect independent texture/sampler access,
+ * we'd have to emulate the logic in the shader. GL doesn't have that,
+ * D3D doesn't have swizzle, if we knew what we were implementing we'd be
+ * good.)
+ *
+ * ! pipe_rasterizer_state.line_last_pixel is ignored - it is never drawn
+ *
+ * ! pipe_rasterizer_state.flatshade_first also applies to QUADS
+ * (There's a GL query for that, forcing an exception is just ridiculous.)
+ *
+ * ! pipe_rasterizer_state.half_pixel_center is ignored - pixel centers
+ * are always at half integer coordinates and the top-left rule applies
+ * (There does not seem to be a hardware switch for this.)
+ *
+ * ! pipe_rasterizer_state.sprite_coord_enable is masked with 0xff on NVC0
+ * (The hardware only has 8 slots meant for TexCoord and we have to assign
+ * in advance to maintain elegant separate shader objects.)
+ */
+
+static INLINE uint32_t
+nv50_colormask(unsigned mask)
+{
+ uint32_t ret = 0;
+
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
+
+ return ret;
+}
+
+#define NV50_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
+static INLINE uint32_t
+nv50_blend_fac(unsigned factor)
+{
+ switch (factor) {
+ NV50_BLEND_FACTOR_CASE(ONE, ONE);
+ NV50_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NV50_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NV50_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NV50_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NV50_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NV50_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NV50_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NV50_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NV50_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NV50_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NV50_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
+}
+
+static void *
+nv50_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
+ int i;
+ boolean emit_common_func = cso->rt[0].blend_enable;
+ uint32_t ms;
+
+ if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
+ SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
+ SB_DATA (so, cso->independent_blend_enable);
+ }
+
+ so->pipe = *cso;
+
+ SB_BEGIN_3D(so, COLOR_MASK_COMMON, 1);
+ SB_DATA (so, !cso->independent_blend_enable);
+
+ SB_BEGIN_3D(so, BLEND_ENABLE_COMMON, 1);
+ SB_DATA (so, !cso->independent_blend_enable);
+
+ if (cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 8);
+ for (i = 0; i < 8; ++i) {
+ SB_DATA(so, cso->rt[i].blend_enable);
+ if (cso->rt[i].blend_enable)
+ emit_common_func = TRUE;
+ }
+
+ if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
+ emit_common_func = FALSE;
+
+ for (i = 0; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ SB_BEGIN_3D_(so, NVA3_3D_IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ }
+ } else {
+ SB_BEGIN_3D(so, BLEND_ENABLE(0), 1);
+ SB_DATA (so, cso->rt[0].blend_enable);
+ }
+
+ if (emit_common_func) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_src_factor));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nv50_blend_fac(cso->rt[0].alpha_dst_factor));
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+ } else {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ if (cso->independent_blend_enable) {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nv50_colormask(cso->rt[i].colormask));
+ } else {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 1);
+ SB_DATA (so, nv50_colormask(cso->rt[0].colormask));
+ }
+
+ ms = 0;
+ if (cso->alpha_to_coverage)
+ ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+ if (cso->alpha_to_one)
+ ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+
+ SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
+ SB_DATA (so, ms);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
+}
+
+static void
+nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend = hwcso;
+ nv50->dirty |= NV50_NEW_BLEND;
+}
+
+static void
+nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */
+static void *
+nv50_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv50_rasterizer_stateobj *so;
+ uint32_t reg;
+
+ so = CALLOC_STRUCT(nv50_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+#ifndef NV50_SCISSORS_CLIPPING
+ SB_BEGIN_3D(so, SCISSOR_ENABLE(0), 1);
+ SB_DATA (so, cso->scissor);
+#endif
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NV50_3D_SHADE_MODEL_FLAT :
+ NV50_3D_SHADE_MODEL_SMOOTH);
+ SB_BEGIN_3D(so, PROVOKING_VERTEX_LAST, 1);
+ SB_DATA (so, !cso->flatshade_first);
+ SB_BEGIN_3D(so, VERTEX_TWO_SIDE_ENABLE, 1);
+ SB_DATA (so, cso->light_twoside);
+
+ SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1);
+ SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000);
+
+ SB_BEGIN_3D(so, MULTISAMPLE_ENABLE, 1);
+ SB_DATA (so, cso->multisample);
+
+ SB_BEGIN_3D(so, LINE_WIDTH, 1);
+ SB_DATA (so, fui(cso->line_width));
+ SB_BEGIN_3D(so, LINE_SMOOTH_ENABLE, 1);
+ SB_DATA (so, cso->line_smooth);
+
+ SB_BEGIN_3D(so, LINE_STIPPLE_ENABLE, 1);
+ if (cso->line_stipple_enable) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, LINE_STIPPLE, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+ SB_BEGIN_3D(so, POINT_SPRITE_ENABLE, 1);
+ SB_DATA (so, cso->point_quad_rasterization);
+ SB_BEGIN_3D(so, POINT_SMOOTH_ENABLE, 1);
+ SB_DATA (so, cso->point_smooth);
+
+ SB_BEGIN_3D(so, POLYGON_MODE_FRONT, 3);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_DATA (so, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NV50_3D_FRONT_FACE_CCW :
+ NV50_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NV50_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NV50_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NV50_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_BEGIN_3D(so, POLYGON_STIPPLE_ENABLE, 1);
+ SB_DATA (so, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1);
+ SB_DATA (so, fui(cso->offset_clamp));
+ }
+
+ if (cso->depth_clip) {
+ reg = 0;
+ } else {
+ reg =
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1;
+ }
+#ifndef NV50_SCISSORS_CLIPPING
+ reg |=
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 |
+ NV50_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1;
+#endif
+ SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1);
+ SB_DATA (so, reg);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->rast = hwcso;
+ nv50->dirty |= NV50_NEW_RASTERIZER;
+}
+
+static void
+nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nv50_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nv50_zsa_stateobj *so = CALLOC_STRUCT(nv50_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_BEGIN_3D(so, DEPTH_WRITE_ENABLE, 1);
+ SB_DATA (so, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_ENABLE, 1);
+ if (cso->depth.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_MASK, 2);
+ SB_DATA (so, cso->stencil[0].writemask);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ } else {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else {
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 1);
+ SB_DATA (so, 0);
+ }
+
+ SB_BEGIN_3D(so, ALPHA_TEST_ENABLE, 1);
+ if (cso->alpha.enabled) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ } else {
+ SB_DATA (so, 0);
+ }
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->zsa = hwcso;
+ nv50->dirty |= NV50_NEW_ZSA;
+}
+
+static void
+nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+void *
+nv50_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv50_tsc_entry *so = MALLOC_STRUCT(nv50_tsc_entry);
+ float f[2];
+
+ so->id = -1;
+
+ so->tsc[0] = (0x00026000 |
+ (nv50_tsc_wrap_mode(cso->wrap_s) << 0) |
+ (nv50_tsc_wrap_mode(cso->wrap_t) << 3) |
+ (nv50_tsc_wrap_mode(cso->wrap_r) << 6));
+
+ switch (cso->mag_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] = NV50_TSC_1_MAGF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] = NV50_TSC_1_MAGF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_img_filter) {
+ case PIPE_TEX_FILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MINF_LINEAR;
+ break;
+ case PIPE_TEX_FILTER_NEAREST:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MINF_NEAREST;
+ break;
+ }
+
+ switch (cso->min_mip_filter) {
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ so->tsc[1] |= NV50_TSC_1_MIPF_NONE;
+ break;
+ }
+
+ if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) {
+ if (cso->seamless_cube_map)
+ so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS;
+ if (!cso->normalized_coords)
+ so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS;
+ }
+
+ if (cso->max_anisotropy >= 16)
+ so->tsc[0] |= (7 << 20);
+ else
+ if (cso->max_anisotropy >= 12)
+ so->tsc[0] |= (6 << 20);
+ else {
+ so->tsc[0] |= (cso->max_anisotropy >> 1) << 20;
+
+ if (cso->max_anisotropy >= 4)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35;
+ else
+ if (cso->max_anisotropy >= 2)
+ so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15;
+ }
+
+ if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ /* NOTE: must be deactivated for non-shadow textures */
+ so->tsc[0] |= (1 << 9);
+ so->tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
+ }
+
+ f[0] = CLAMP(cso->lod_bias, -16.0f, 15.0f);
+ so->tsc[1] |= ((int)(f[0] * 256.0f) & 0x1fff) << 12;
+
+ f[0] = CLAMP(cso->min_lod, 0.0f, 15.0f);
+ f[1] = CLAMP(cso->max_lod, 0.0f, 15.0f);
+ so->tsc[2] =
+ (((int)(f[1] * 256.0f) & 0xfff) << 12) | ((int)(f[0] * 256.0f) & 0xfff);
+
+ so->tsc[2] |=
+ util_format_linear_float_to_srgb_8unorm(cso->border_color.f[0]) << 24;
+ so->tsc[3] =
+ util_format_linear_float_to_srgb_8unorm(cso->border_color.f[1]) << 12;
+ so->tsc[3] |=
+ util_format_linear_float_to_srgb_8unorm(cso->border_color.f[2]) << 20;
+
+ so->tsc[4] = fui(cso->border_color.f[0]);
+ so->tsc[5] = fui(cso->border_color.f[1]);
+ so->tsc[6] = fui(cso->border_color.f[2]);
+ so->tsc[7] = fui(cso->border_color.f[3]);
+
+ return (void *)so;
+}
+
+static void
+nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ unsigned s, i;
+
+ for (s = 0; s < 3; ++s)
+ for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i)
+ if (nv50_context(pipe)->samplers[s][i] == hwcso)
+ nv50_context(pipe)->samplers[s][i] = NULL;
+
+ nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso));
+
+ FREE(hwcso);
+}
+
+static INLINE void
+nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
+ unsigned nr, void **hwcso)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tsc_entry *old = nv50->samplers[s][i];
+
+ nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+ if (old)
+ nv50_screen_tsc_unlock(nv50->screen, old);
+ }
+ for (; i < nv50->num_samplers[s]; ++i)
+ if (nv50->samplers[s][i])
+ nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]);
+
+ nv50->num_samplers[s] = nr;
+
+ nv50->dirty |= NV50_NEW_SAMPLERS;
+}
+
+static void
+nv50_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 0, nr, s);
+}
+
+static void
+nv50_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 2, nr, s);
+}
+
+static void
+nv50_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nv50_stage_sampler_states_bind(nv50_context(pipe), 1, nr, s);
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nv50_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+
+ nv50_screen_tic_free(nv50_context(pipe)->screen, nv50_tic_entry(view));
+
+ FREE(nv50_tic_entry(view));
+}
+
+static INLINE void
+nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+ if (old)
+ nv50_screen_tic_unlock(nv50->screen, old);
+
+ pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nv50->num_textures[s]; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+ if (!old)
+ continue;
+ nv50_screen_tic_unlock(nv50->screen, old);
+
+ pipe_sampler_view_reference(&nv50->textures[s][i], NULL);
+ }
+
+ nv50->num_textures[s] = nr;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+
+ nv50->dirty |= NV50_NEW_TEXTURES;
+}
+
+static void
+nv50_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 0, nr, views);
+}
+
+static void
+nv50_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 2, nr, views);
+}
+
+static void
+nv50_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_stage_set_sampler_views(nv50_context(pipe), 1, nr, views);
+}
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nv50_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
+{
+ struct nv50_program *prog;
+
+ prog = CALLOC_STRUCT(nv50_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ if (cso->stream_output.num_outputs)
+ prog->pipe.stream_output = cso->stream_output;
+
+ return (void *)prog;
+}
+
+static void
+nv50_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_program *prog = (struct nv50_program *)hwcso;
+
+ nv50_program_destroy(nv50_context(pipe), prog);
+
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+}
+
+static void *
+nv50_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertprog = hwcso;
+ nv50->dirty |= NV50_NEW_VERTPROG;
+}
+
+static void *
+nv50_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->fragprog = hwcso;
+ nv50->dirty |= NV50_NEW_FRAGPROG;
+}
+
+static void *
+nv50_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nv50_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->gmtyprog = hwcso;
+ nv50->dirty |= NV50_NEW_GMTYPROG;
+}
+
+static void
+nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_constant_buffer *cb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct pipe_resource *res = cb ? cb->buffer : NULL;
+ const unsigned s = nv50_context_shader_stage(shader);
+ const unsigned i = index;
+
+ if (shader == PIPE_SHADER_COMPUTE)
+ return;
+
+ if (nv50->constbuf[s][i].user)
+ nv50->constbuf[s][i].u.buf = NULL;
+ else
+ if (nv50->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+
+ pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
+
+ nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ if (nv50->constbuf[s][i].user) {
+ nv50->constbuf[s][i].u.data = cb->user_buffer;
+ nv50->constbuf[s][i].size = cb->buffer_size;
+ nv50->constbuf_valid[s] |= 1 << i;
+ } else
+ if (res) {
+ nv50->constbuf[s][i].offset = cb->buffer_offset;
+ nv50->constbuf[s][i].size = align(cb->buffer_size, 0x100);
+ nv50->constbuf_valid[s] |= 1 << i;
+ } else {
+ nv50->constbuf_valid[s] &= ~(1 << i);
+ }
+ nv50->constbuf_dirty[s] |= 1 << i;
+
+ nv50->dirty |= NV50_NEW_CONSTBUF;
+}
+
+/* =============================================================================
+ */
+
+static void
+nv50_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->blend_colour = *bcol;
+ nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+}
+
+static void
+nv50_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stencil_ref = *sr;
+ nv50->dirty |= NV50_NEW_STENCIL_REF;
+}
+
+static void
+nv50_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp));
+
+ nv50->dirty |= NV50_NEW_CLIP;
+}
+
+static void
+nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->sample_mask = sample_mask;
+ nv50->dirty |= NV50_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nv50_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]);
+ for (; i < nv50->framebuffer.nr_cbufs; ++i)
+ pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL);
+
+ nv50->framebuffer.nr_cbufs = fb->nr_cbufs;
+
+ nv50->framebuffer.width = fb->width;
+ nv50->framebuffer.height = fb->height;
+
+ pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf);
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->stipple = *stipple;
+ nv50->dirty |= NV50_NEW_STIPPLE;
+}
+
+static void
+nv50_set_scissor_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->scissor = *scissor;
+ nv50->dirty |= NV50_NEW_SCISSOR;
+}
+
+static void
+nv50_set_viewport_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->viewport = *vpt;
+ nv50->dirty |= NV50_NEW_VIEWPORT;
+}
+
+static void
+nv50_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+
+ util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
+ start_slot, count);
+
+ if (!vb) {
+ nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
+ nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
+ return;
+ }
+
+ for (i = 0; i < count; ++i) {
+ unsigned dst_index = start_slot + i;
+
+ if (!vb[i].buffer && vb[i].user_buffer) {
+ nv50->vbo_user |= 1 << dst_index;
+ if (!vb[i].stride)
+ nv50->vbo_constant |= 1 << dst_index;
+ else
+ nv50->vbo_constant &= ~(1 << dst_index);
+ } else {
+ nv50->vbo_user &= ~(1 << dst_index);
+ nv50->vbo_constant &= ~(1 << dst_index);
+ }
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+
+ nv50->dirty |= NV50_NEW_ARRAYS;
+}
+
+static void
+nv50_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ if (nv50->idxbuf.buffer)
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+
+ if (ib) {
+ pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer);
+ nv50->idxbuf.index_size = ib->index_size;
+ if (ib->buffer) {
+ nv50->idxbuf.offset = ib->offset;
+ BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD);
+ } else {
+ nv50->idxbuf.user_buffer = ib->user_buffer;
+ }
+ } else {
+ pipe_resource_reference(&nv50->idxbuf.buffer, NULL);
+ }
+}
+
+static void
+nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->vertex = hwcso;
+ nv50->dirty |= NV50_NEW_VERTEX;
+}
+
+static struct pipe_stream_output_target *
+nv50_so_target_create(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size)
+{
+ struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target);
+ if (!targ)
+ return NULL;
+
+ if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
+ targ->pq = pipe->create_query(pipe,
+ NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET);
+ if (!targ->pq) {
+ FREE(targ);
+ return NULL;
+ }
+ } else {
+ targ->pq = NULL;
+ }
+ targ->clean = TRUE;
+
+ targ->pipe.buffer_size = size;
+ targ->pipe.buffer_offset = offset;
+ targ->pipe.context = pipe;
+ targ->pipe.buffer = NULL;
+ pipe_resource_reference(&targ->pipe.buffer, res);
+ pipe_reference_init(&targ->pipe.reference, 1);
+
+ return &targ->pipe;
+}
+
+static void
+nv50_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+ if (targ->pq)
+ pipe->destroy_query(pipe, targ->pq);
+ pipe_resource_reference(&targ->pipe.buffer, NULL);
+ FREE(targ);
+}
+
+static void
+nv50_set_stream_output_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_mask)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+ boolean serialize = TRUE;
+ const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+
+ assert(num_targets <= 4);
+
+ for (i = 0; i < num_targets; ++i) {
+ const boolean changed = nv50->so_target[i] != targets[i];
+ if (!changed && (append_mask & (1 << i)))
+ continue;
+ nv50->so_targets_dirty |= 1 << i;
+
+ if (can_resume && changed && nv50->so_target[i]) {
+ nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+ serialize = FALSE;
+ }
+
+ if (targets[i] && !(append_mask & (1 << i)))
+ nv50_so_target(targets[i])->clean = TRUE;
+
+ pipe_so_target_reference(&nv50->so_target[i], targets[i]);
+ }
+ for (; i < nv50->num_so_targets; ++i) {
+ if (can_resume && nv50->so_target[i]) {
+ nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+ serialize = FALSE;
+ }
+ pipe_so_target_reference(&nv50->so_target[i], NULL);
+ nv50->so_targets_dirty |= 1 << i;
+ }
+ nv50->num_so_targets = num_targets;
+
+ if (nv50->so_targets_dirty)
+ nv50->dirty |= NV50_NEW_STRMOUT;
+}
+
+void
+nv50_init_state_functions(struct nv50_context *nv50)
+{
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->create_blend_state = nv50_blend_state_create;
+ pipe->bind_blend_state = nv50_blend_state_bind;
+ pipe->delete_blend_state = nv50_blend_state_delete;
+
+ pipe->create_rasterizer_state = nv50_rasterizer_state_create;
+ pipe->bind_rasterizer_state = nv50_rasterizer_state_bind;
+ pipe->delete_rasterizer_state = nv50_rasterizer_state_delete;
+
+ pipe->create_depth_stencil_alpha_state = nv50_zsa_state_create;
+ pipe->bind_depth_stencil_alpha_state = nv50_zsa_state_bind;
+ pipe->delete_depth_stencil_alpha_state = nv50_zsa_state_delete;
+
+ pipe->create_sampler_state = nv50_sampler_state_create;
+ pipe->delete_sampler_state = nv50_sampler_state_delete;
+ pipe->bind_vertex_sampler_states = nv50_vp_sampler_states_bind;
+ pipe->bind_fragment_sampler_states = nv50_fp_sampler_states_bind;
+ pipe->bind_geometry_sampler_states = nv50_gp_sampler_states_bind;
+
+ pipe->create_sampler_view = nv50_create_sampler_view;
+ pipe->sampler_view_destroy = nv50_sampler_view_destroy;
+ pipe->set_vertex_sampler_views = nv50_vp_set_sampler_views;
+ pipe->set_fragment_sampler_views = nv50_fp_set_sampler_views;
+ pipe->set_geometry_sampler_views = nv50_gp_set_sampler_views;
+
+ pipe->create_vs_state = nv50_vp_state_create;
+ pipe->create_fs_state = nv50_fp_state_create;
+ pipe->create_gs_state = nv50_gp_state_create;
+ pipe->bind_vs_state = nv50_vp_state_bind;
+ pipe->bind_fs_state = nv50_fp_state_bind;
+ pipe->bind_gs_state = nv50_gp_state_bind;
+ pipe->delete_vs_state = nv50_sp_state_delete;
+ pipe->delete_fs_state = nv50_sp_state_delete;
+ pipe->delete_gs_state = nv50_sp_state_delete;
+
+ pipe->set_blend_color = nv50_set_blend_color;
+ pipe->set_stencil_ref = nv50_set_stencil_ref;
+ pipe->set_clip_state = nv50_set_clip_state;
+ pipe->set_sample_mask = nv50_set_sample_mask;
+ pipe->set_constant_buffer = nv50_set_constant_buffer;
+ pipe->set_framebuffer_state = nv50_set_framebuffer_state;
+ pipe->set_polygon_stipple = nv50_set_polygon_stipple;
+ pipe->set_scissor_states = nv50_set_scissor_states;
+ pipe->set_viewport_states = nv50_set_viewport_states;
+
+ pipe->create_vertex_elements_state = nv50_vertex_state_create;
+ pipe->delete_vertex_elements_state = nv50_vertex_state_delete;
+ pipe->bind_vertex_elements_state = nv50_vertex_state_bind;
+
+ pipe->set_vertex_buffers = nv50_set_vertex_buffers;
+ pipe->set_index_buffer = nv50_set_index_buffer;
+
+ pipe->create_stream_output_target = nv50_so_target_create;
+ pipe->stream_output_target_destroy = nv50_so_target_destroy;
+ pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
new file mode 100644
index 00000000000..866829ca22d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -0,0 +1,414 @@
+
+#include "nv50/nv50_context.h"
+#include "os/os_time.h"
+
+static void
+nv50_validate_fb(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i;
+ unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
+ uint32_t array_size = 0xffff, array_mode = 0;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_NV04(push, NV50_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, fb->width << 16);
+ PUSH_DATA (push, fb->height << 16);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nv50_miptree *mt = nv50_miptree(fb->cbufs[i]->texture);
+ struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
+ struct nouveau_bo *bo = mt->base.bo;
+
+ array_size = MIN2(array_size, sf->depth);
+ if (mt->layout_3d)
+ array_mode = NV50_3D_RT_ARRAY_MODE_MODE_3D; /* 1 << 16 */
+
+ /* can't mix 3D with ARRAY or have RTs of different depth/array_size */
+ assert(mt->layout_3d || !array_mode || array_size == 1);
+
+ BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[sf->base.format].rt);
+ if (likely(nouveau_bo_memtype(bo))) {
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+ PUSH_DATA (push, array_mode | array_size);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(i)), 2);
+ PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch);
+ PUSH_DATA (push, sf->height);
+ BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+ PUSH_DATA (push, 0);
+
+ assert(!fb->zsbuf);
+ assert(!mt->ms_mode);
+ }
+
+ ms_mode = mt->ms_mode;
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ nv50->state.rt_serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ /* only register for writing, otherwise we'd always serialize here */
+ BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ }
+
+ if (fb->zsbuf) {
+ struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ struct nouveau_bo *bo = mt->base.bo;
+ int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1;
+
+ BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[fb->zsbuf->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) | sf->depth);
+
+ ms_mode = mt->ms_mode;
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ nv50->state.rt_serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ } else {
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, ms_mode);
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, fb->width << 16);
+ PUSH_DATA (push, fb->height << 16);
+}
+
+static void
+nv50_validate_blend_colour(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ BEGIN_NV04(push, NV50_3D(BLEND_COLOR(0)), 4);
+ PUSH_DATAf(push, nv50->blend_colour.color[0]);
+ PUSH_DATAf(push, nv50->blend_colour.color[1]);
+ PUSH_DATAf(push, nv50->blend_colour.color[2]);
+ PUSH_DATAf(push, nv50->blend_colour.color[3]);
+}
+
+static void
+nv50_validate_stencil_ref(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ BEGIN_NV04(push, NV50_3D(STENCIL_FRONT_FUNC_REF), 1);
+ PUSH_DATA (push, nv50->stencil_ref.ref_value[0]);
+ BEGIN_NV04(push, NV50_3D(STENCIL_BACK_FUNC_REF), 1);
+ PUSH_DATA (push, nv50->stencil_ref.ref_value[1]);
+}
+
+static void
+nv50_validate_stipple(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned i;
+
+ BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ PUSH_DATA(push, util_bswap32(nv50->stipple.stipple[i]));
+}
+
+static void
+nv50_validate_scissor(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_scissor_state *s = &nv50->scissor;
+#ifdef NV50_SCISSORS_CLIPPING
+ struct pipe_viewport_state *vp = &nv50->viewport;
+ int minx, maxx, miny, maxy;
+
+ if (!(nv50->dirty &
+ (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
+ nv50->state.scissor == nv50->rast->pipe.scissor)
+ return;
+ nv50->state.scissor = nv50->rast->pipe.scissor;
+
+ if (nv50->state.scissor) {
+ minx = s->minx;
+ maxx = s->maxx;
+ miny = s->miny;
+ maxy = s->maxy;
+ } else {
+ minx = 0;
+ maxx = nv50->framebuffer.width;
+ miny = 0;
+ maxy = nv50->framebuffer.height;
+ }
+
+ minx = MAX2(minx, (int)(vp->translate[0] - fabsf(vp->scale[0])));
+ maxx = MIN2(maxx, (int)(vp->translate[0] + fabsf(vp->scale[0])));
+ miny = MAX2(miny, (int)(vp->translate[1] - fabsf(vp->scale[1])));
+ maxy = MIN2(maxy, (int)(vp->translate[1] + fabsf(vp->scale[1])));
+
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (maxx << 16) | minx);
+ PUSH_DATA (push, (maxy << 16) | miny);
+#else
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (s->maxx << 16) | s->minx);
+ PUSH_DATA (push, (s->maxy << 16) | s->miny);
+#endif
+}
+
+static void
+nv50_validate_viewport(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ float zmin, zmax;
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ PUSH_DATAf(push, nv50->viewport.translate[0]);
+ PUSH_DATAf(push, nv50->viewport.translate[1]);
+ PUSH_DATAf(push, nv50->viewport.translate[2]);
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_SCALE_X(0)), 3);
+ PUSH_DATAf(push, nv50->viewport.scale[0]);
+ PUSH_DATAf(push, nv50->viewport.scale[1]);
+ PUSH_DATAf(push, nv50->viewport.scale[2]);
+
+ zmin = nv50->viewport.translate[2] - fabsf(nv50->viewport.scale[2]);
+ zmax = nv50->viewport.translate[2] + fabsf(nv50->viewport.scale[2]);
+
+#ifdef NV50_SCISSORS_CLIPPING
+ BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, zmin);
+ PUSH_DATAf(push, zmax);
+#endif
+}
+
+static INLINE void
+nv50_check_program_ucps(struct nv50_context *nv50,
+ struct nv50_program *vp, uint8_t mask)
+{
+ const unsigned n = util_logbase2(mask) + 1;
+
+ if (vp->vp.clpd_nr >= n)
+ return;
+ nv50_program_destroy(nv50, vp);
+
+ vp->vp.clpd_nr = n;
+ if (likely(vp == nv50->vertprog)) {
+ nv50->dirty |= NV50_NEW_VERTPROG;
+ nv50_vertprog_validate(nv50);
+ } else {
+ nv50->dirty |= NV50_NEW_GMTYPROG;
+ nv50_gmtyprog_validate(nv50);
+ }
+ nv50_fp_linkage_validate(nv50);
+}
+
+static void
+nv50_validate_clip(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *vp;
+ uint8_t clip_enable;
+
+ if (nv50->dirty & NV50_NEW_CLIP) {
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (0 << 8) | NV50_CB_AUX);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4);
+ PUSH_DATAp(push, &nv50->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
+ }
+
+ vp = nv50->gmtyprog;
+ if (likely(!vp))
+ vp = nv50->vertprog;
+
+ clip_enable = nv50->rast->pipe.clip_plane_enable;
+
+ BEGIN_NV04(push, NV50_3D(CLIP_DISTANCE_ENABLE), 1);
+ PUSH_DATA (push, clip_enable);
+
+ if (clip_enable)
+ nv50_check_program_ucps(nv50, vp, clip_enable);
+}
+
+static void
+nv50_validate_blend(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ PUSH_SPACE(push, nv50->blend->size);
+ PUSH_DATAp(push, nv50->blend->state, nv50->blend->size);
+}
+
+static void
+nv50_validate_zsa(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ PUSH_SPACE(push, nv50->zsa->size);
+ PUSH_DATAp(push, nv50->zsa->state, nv50->zsa->size);
+}
+
+static void
+nv50_validate_rasterizer(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ PUSH_SPACE(push, nv50->rast->size);
+ PUSH_DATAp(push, nv50->rast->state, nv50->rast->size);
+}
+
+static void
+nv50_validate_sample_mask(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ unsigned mask[4] =
+ {
+ nv50->sample_mask & 0xffff,
+ nv50->sample_mask & 0xffff,
+ nv50->sample_mask & 0xffff,
+ nv50->sample_mask & 0xffff
+ };
+
+ BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, mask[0]);
+ PUSH_DATA (push, mask[1]);
+ PUSH_DATA (push, mask[2]);
+ PUSH_DATA (push, mask[3]);
+}
+
+static void
+nv50_switch_pipe_context(struct nv50_context *ctx_to)
+{
+ struct nv50_context *ctx_from = ctx_to->screen->cur_ctx;
+
+ if (ctx_from)
+ ctx_to->state = ctx_from->state;
+
+ ctx_to->dirty = ~0;
+
+ if (!ctx_to->vertex)
+ ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
+
+ if (!ctx_to->vertprog)
+ ctx_to->dirty &= ~NV50_NEW_VERTPROG;
+ if (!ctx_to->fragprog)
+ ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
+
+ if (!ctx_to->blend)
+ ctx_to->dirty &= ~NV50_NEW_BLEND;
+ if (!ctx_to->rast)
+#ifdef NV50_SCISSORS_CLIPPING
+ ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR);
+#else
+ ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
+#endif
+ if (!ctx_to->zsa)
+ ctx_to->dirty &= ~NV50_NEW_ZSA;
+
+ ctx_to->screen->cur_ctx = ctx_to;
+}
+
+static struct state_validate {
+ void (*func)(struct nv50_context *);
+ uint32_t states;
+} validate_list[] = {
+ { nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
+ { nv50_validate_blend, NV50_NEW_BLEND },
+ { nv50_validate_zsa, NV50_NEW_ZSA },
+ { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK },
+ { nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
+ { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
+ { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
+ { nv50_validate_stipple, NV50_NEW_STIPPLE },
+#ifdef NV50_SCISSORS_CLIPPING
+ { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
+ NV50_NEW_RASTERIZER |
+ NV50_NEW_FRAMEBUFFER },
+#else
+ { nv50_validate_scissor, NV50_NEW_SCISSOR },
+#endif
+ { nv50_validate_viewport, NV50_NEW_VIEWPORT },
+ { nv50_vertprog_validate, NV50_NEW_VERTPROG },
+ { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
+ { nv50_fragprog_validate, NV50_NEW_FRAGPROG },
+ { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+ NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
+ { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
+ { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
+ { nv50_validate_textures, NV50_NEW_TEXTURES },
+ { nv50_validate_samplers, NV50_NEW_SAMPLERS },
+ { nv50_stream_output_validate, NV50_NEW_STRMOUT |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
+{
+ uint32_t state_mask;
+ int ret;
+ unsigned i;
+
+ if (nv50->screen->cur_ctx != nv50)
+ nv50_switch_pipe_context(nv50);
+
+ state_mask = nv50->dirty & mask;
+
+ if (state_mask) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (state_mask & validate->states)
+ validate->func(nv50);
+ }
+ nv50->dirty &= ~state_mask;
+
+ if (nv50->state.rt_serialize) {
+ nv50->state.rt_serialize = FALSE;
+ BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+ }
+
+ nv50_bufctx_fence(nv50->bufctx_3d, FALSE);
+ }
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
+ ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
+
+ if (unlikely(nv50->state.flushed)) {
+ nv50->state.flushed = FALSE;
+ nv50_bufctx_fence(nv50->bufctx_3d, TRUE);
+ }
+ return !ret;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
new file mode 100644
index 00000000000..238951733cf
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -0,0 +1,78 @@
+
+#ifndef __NV50_STATEOBJ_H__
+#define __NV50_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define NV50_SCISSORS_CLIPPING
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = NV50_FIFO_PKHDR(NV50_3D(m), s)
+
+#define SB_BEGIN_3D_(so, m, s) \
+ (so)->state[(so)->size++] = NV50_FIFO_PKHDR(SUBC_3D(m), s)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+#include "nv50/nv50_stateobj_tex.h"
+
+struct nv50_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
+};
+
+struct nv50_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[48];
+};
+
+struct nv50_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[29];
+};
+
+struct nv50_constbuf {
+ union {
+ struct pipe_resource *buf;
+ const uint8_t *data;
+ } u;
+ uint32_t size; /* max 65536 */
+ uint32_t offset;
+ boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+};
+
+struct nv50_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+};
+
+struct nv50_vertex_stateobj {
+ uint32_t min_instance_div[PIPE_MAX_ATTRIBS];
+ uint16_t vb_access_size[PIPE_MAX_ATTRIBS];
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_elts;
+ uint32_t instance_bufs;
+ boolean need_conversion;
+ unsigned vertex_size;
+ unsigned packet_vertex_limit;
+ struct nv50_vertex_element element[0];
+};
+
+struct nv50_so_target {
+ struct pipe_stream_output_target pipe;
+ struct pipe_query *pq;
+ unsigned stride;
+ boolean clean;
+};
+
+static INLINE struct nv50_so_target *
+nv50_so_target(struct pipe_stream_output_target *ptarg)
+{
+ return (struct nv50_so_target *)ptarg;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
new file mode 100644
index 00000000000..99548cbdb42
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -0,0 +1,34 @@
+
+#ifndef __NV50_STATEOBJ_TEX_H__
+#define __NV50_STATEOBJ_TEX_H__
+
+#include "pipe/p_state.h"
+
+struct nv50_tsc_entry {
+ int id;
+ uint32_t tsc[8];
+};
+
+static INLINE struct nv50_tsc_entry *
+nv50_tsc_entry(void *hwcso)
+{
+ return (struct nv50_tsc_entry *)hwcso;
+}
+
+struct nv50_tic_entry {
+ struct pipe_sampler_view pipe;
+ int id;
+ uint32_t tic[8];
+};
+
+static INLINE struct nv50_tic_entry *
+nv50_tic_entry(struct pipe_sampler_view *view)
+{
+ return (struct nv50_tic_entry *)view;
+}
+
+extern void *
+nv50_sampler_state_create(struct pipe_context *,
+ const struct pipe_sampler_state *);
+
+#endif /* __NV50_STATEOBJ_TEX_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
new file mode 100644
index 00000000000..dcc1fce41c5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -0,0 +1,1353 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+#include "util/u_surface.h"
+
+#include "tgsi/tgsi_ureg.h"
+
+#include "os/os_thread.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+#include "nv50/nv50_defs.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+/* these are used in nv50_blit.h */
+#define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL
+#define NV50_ENG2D_NOCONVERT_FORMATS 0x0008402000000000ULL
+#define NV50_ENG2D_LUMINANCE_FORMATS 0x0008402000000000ULL
+#define NV50_ENG2D_INTENSITY_FORMATS 0x0000000000000000ULL
+#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000608000ULL
+
+#define NOUVEAU_DRIVER 0x50
+#include "nv50/nv50_blit.h"
+
+static INLINE uint8_t
+nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+{
+ uint8_t id = nv50_format_table[format].rt;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if ((id >= 0xc0) && (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0))))
+ return id;
+ assert(dst_src_equal);
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ default:
+ return 0;
+ }
+}
+
+static int
+nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
+ struct nv50_miptree *mt, unsigned level, unsigned layer,
+ enum pipe_format pformat, boolean dst_src_pformat_equal)
+{
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT;
+ uint32_t offset = mt->level[level].offset;
+
+ format = nv50_2d_format(pformat, dst, dst_src_pformat_equal);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(pformat));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level) << mt->ms_x;
+ height = u_minify(mt->base.base.height0, level) << mt->ms_y;
+ depth = u_minify(mt->base.base.depth0, level);
+
+ offset = mt->level[level].offset;
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ depth = 1;
+ layer = 0;
+ } else
+ if (!dst) {
+ offset += nv50_mt_zslice_offset(mt, level, layer);
+ layer = 0;
+ }
+
+ if (!nouveau_bo_memtype(bo)) {
+ BEGIN_NV04(push, SUBC_2D(mthd), 2);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, SUBC_2D(mthd + 0x14), 5);
+ PUSH_DATA (push, mt->level[level].pitch);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ } else {
+ BEGIN_NV04(push, SUBC_2D(mthd), 5);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, mt->level[level].tile_mode);
+ PUSH_DATA (push, depth);
+ PUSH_DATA (push, layer);
+ BEGIN_NV04(push, SUBC_2D(mthd + 0x18), 4);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_NV04(push, SUBC_2D(NV50_2D_CLIP_X), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ }
+#endif
+ return 0;
+}
+
+static int
+nv50_2d_texture_do_copy(struct nouveau_pushbuf *push,
+ struct nv50_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nv50_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
+{
+ const enum pipe_format dfmt = dst->base.base.format;
+ const enum pipe_format sfmt = src->base.base.format;
+ int ret;
+ boolean eqfmt = dfmt == sfmt;
+
+ if (!PUSH_SPACE(push, 2 * 16 + 32))
+ return PIPE_ERROR;
+
+ ret = nv50_2d_texture_set(push, 1, dst, dst_level, dz, dfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ ret = nv50_2d_texture_set(push, 0, src, src_level, sz, sfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1);
+ PUSH_DATA (push, NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE);
+ BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dx << dst->ms_x);
+ PUSH_DATA (push, dy << dst->ms_y);
+ PUSH_DATA (push, w << dst->ms_x);
+ PUSH_DATA (push, h << dst->ms_y);
+ BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sx << src->ms_x);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sy << src->ms_y);
+
+ return 0;
+}
+
+static void
+nv50_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ int ret;
+ boolean m2mf;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ nouveau_copy_buffer(&nv50->base,
+ nv04_resource(dst), dstx,
+ nv04_resource(src), src_box->x, src_box->width);
+ return;
+ }
+
+ /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */
+ assert((src->nr_samples | 1) == (dst->nr_samples | 1));
+
+ m2mf = (src->format == dst->format) ||
+ (util_format_get_blocksizebits(src->format) ==
+ util_format_get_blocksizebits(dst->format));
+
+ nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+
+ if (m2mf) {
+ struct nv50_m2mf_rect drect, srect;
+ unsigned i;
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+
+ nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
+ nv50_m2mf_rect_setup(&srect, src, src_level,
+ src_box->x, src_box->y, src_box->z);
+
+ for (i = 0; i < src_box->depth; ++i) {
+ nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny);
+
+ if (nv50_miptree(dst)->layout_3d)
+ drect.z++;
+ else
+ drect.base += nv50_miptree(dst)->layer_stride;
+
+ if (nv50_miptree(src)->layout_3d)
+ srect.z++;
+ else
+ srect.base += nv50_miptree(src)->layer_stride;
+ }
+ return;
+ }
+
+ assert((src->format == dst->format) ||
+ (nv50_2d_src_format_faithful(src->format) &&
+ nv50_2d_dst_format_faithful(dst->format)));
+
+ BCTX_REFN(nv50->bufctx, 2D, nv04_resource(src), RD);
+ BCTX_REFN(nv50->bufctx, 2D, nv04_resource(dst), WR);
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx);
+ nouveau_pushbuf_validate(nv50->base.pushbuf);
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nv50_2d_texture_do_copy(nv50->base.pushbuf,
+ nv50_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nv50_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ break;
+ }
+ nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+}
+
+static void
+nv50_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ unsigned z;
+
+ BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+
+ if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+ return;
+
+ PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
+ if (nouveau_bo_memtype(bo))
+ PUSH_DATA(push, sf->width);
+ else
+ PUSH_DATA(push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch);
+ PUSH_DATA (push, sf->height);
+ BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+ PUSH_DATA (push, 1);
+
+ if (!nouveau_bo_memtype(bo)) {
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, (width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, 0x3c |
+ (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+static void
+nv50_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t mode = 0;
+ unsigned z;
+
+ assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
+ PUSH_DATAf(push, depth);
+ mode |= NV50_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NV50_3D_CLEAR_BUFFERS_S;
+ }
+
+ if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+ return;
+
+ PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, bo->offset + sf->offset);
+ PUSH_DATA (push, bo->offset + sf->offset);
+ PUSH_DATA (push, nv50_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (1 << 16) | 1);
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, (width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, mode |
+ (z << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+}
+
+void
+nv50_clear(struct pipe_context *pipe, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ unsigned i;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2)))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+ mode =
+ NV50_3D_CLEAR_BUFFERS_R | NV50_3D_CLEAR_BUFFERS_G |
+ NV50_3D_CLEAR_BUFFERS_B | NV50_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
+ PUSH_DATA (push, fui(depth));
+ mode |= NV50_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NV50_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, (i << 6) | 0x3c);
+ }
+}
+
+
+/* =============================== BLIT CODE ===================================
+ */
+
+struct nv50_blitter
+{
+ struct nv50_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
+ struct nv50_program vp;
+
+ struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+
+ pipe_mutex mutex;
+};
+
+struct nv50_blitctx
+{
+ struct nv50_context *nv50;
+ struct nv50_program *fp;
+ uint8_t mode;
+ uint16_t color_mask;
+ uint8_t filter;
+ enum pipe_texture_target target;
+ struct {
+ struct pipe_framebuffer_state fb;
+ struct nv50_rasterizer_stateobj *rast;
+ struct nv50_program *vp;
+ struct nv50_program *gp;
+ struct nv50_program *fp;
+ unsigned num_textures[3];
+ unsigned num_samplers[3];
+ struct pipe_sampler_view *texture[2];
+ struct nv50_tsc_entry *sampler[2];
+ uint32_t dirty;
+ } saved;
+ struct nv50_rasterizer_stateobj rast;
+};
+
+static void
+nv50_blitter_make_vp(struct nv50_blitter *blit)
+{
+ static const uint32_t code[] =
+ {
+ 0x10000001, 0x0423c788, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */
+ 0x10000205, 0x0423c788, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */
+ 0x10000409, 0x0423c788, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */
+ 0x1000060d, 0x0423c788, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */
+ 0x10000811, 0x0423c789, /* mov b32 o[0x10] s[0x10] */ /* TEXC.z */
+ };
+
+ blit->vp.type = PIPE_SHADER_VERTEX;
+ blit->vp.translated = TRUE;
+ blit->vp.code = (uint32_t *)code; /* const_cast */
+ blit->vp.code_size = sizeof(code);
+ blit->vp.max_gpr = 4;
+ blit->vp.max_out = 5;
+ blit->vp.out_nr = 2;
+ blit->vp.out[0].mask = 0x3;
+ blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION;
+ blit->vp.out[1].hw = 2;
+ blit->vp.out[1].mask = 0x7;
+ blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC;
+ blit->vp.out[1].si = 0;
+ blit->vp.vp.attrs[0] = 0x73;
+ blit->vp.vp.psiz = 0x40;
+ blit->vp.vp.edgeflag = 0x40;
+}
+
+void *
+nv50_blitter_make_fp(struct pipe_context *pipe,
+ unsigned mode,
+ enum pipe_texture_target ptarg)
+{
+ struct ureg_program *ureg;
+ struct ureg_src tc;
+ struct ureg_dst out;
+ struct ureg_dst data;
+
+ const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg);
+
+ boolean tex_rgbaz = FALSE;
+ boolean tex_s = FALSE;
+ boolean cvt_un8 = FALSE;
+
+ if (mode != NV50_BLIT_MODE_PASS &&
+ mode != NV50_BLIT_MODE_Z24X8 &&
+ mode != NV50_BLIT_MODE_X8Z24)
+ tex_s = TRUE;
+
+ if (mode != NV50_BLIT_MODE_X24S8 &&
+ mode != NV50_BLIT_MODE_S8X24 &&
+ mode != NV50_BLIT_MODE_XS)
+ tex_rgbaz = TRUE;
+
+ if (mode != NV50_BLIT_MODE_PASS &&
+ mode != NV50_BLIT_MODE_ZS &&
+ mode != NV50_BLIT_MODE_XS)
+ cvt_un8 = TRUE;
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ tc = ureg_DECL_fs_input(
+ ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR);
+
+ data = ureg_DECL_temporary(ureg);
+
+ if (tex_s) {
+ ureg_TEX(ureg, ureg_writemask(data, TGSI_WRITEMASK_X),
+ target, tc, ureg_DECL_sampler(ureg, 1));
+ ureg_MOV(ureg, ureg_writemask(data, TGSI_WRITEMASK_Y),
+ ureg_scalar(ureg_src(data), TGSI_SWIZZLE_X));
+ }
+ if (tex_rgbaz) {
+ const unsigned mask = (mode == NV50_BLIT_MODE_PASS) ?
+ TGSI_WRITEMASK_XYZW : TGSI_WRITEMASK_X;
+ ureg_TEX(ureg, ureg_writemask(data, mask),
+ target, tc, ureg_DECL_sampler(ureg, 0));
+ }
+
+ if (cvt_un8) {
+ struct ureg_src mask;
+ struct ureg_src scale;
+ struct ureg_dst outz;
+ struct ureg_dst outs;
+ struct ureg_dst zdst3 = ureg_writemask(data, TGSI_WRITEMASK_XYZ);
+ struct ureg_dst zdst = ureg_writemask(data, TGSI_WRITEMASK_X);
+ struct ureg_dst sdst = ureg_writemask(data, TGSI_WRITEMASK_Y);
+ struct ureg_src zsrc3 = ureg_src(data);
+ struct ureg_src zsrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_X);
+ struct ureg_src ssrc = ureg_scalar(zsrc3, TGSI_SWIZZLE_Y);
+ struct ureg_src zshuf;
+
+ mask = ureg_imm3u(ureg, 0x0000ff, 0x00ff00, 0xff0000);
+ scale = ureg_imm4f(ureg,
+ 1.0f / 0x0000ff, 1.0f / 0x00ff00, 1.0f / 0xff0000,
+ (1 << 24) - 1);
+
+ if (mode == NV50_BLIT_MODE_Z24S8 ||
+ mode == NV50_BLIT_MODE_X24S8 ||
+ mode == NV50_BLIT_MODE_Z24X8) {
+ outz = ureg_writemask(out, TGSI_WRITEMASK_XYZ);
+ outs = ureg_writemask(out, TGSI_WRITEMASK_W);
+ zshuf = ureg_src(data);
+ } else {
+ outz = ureg_writemask(out, TGSI_WRITEMASK_YZW);
+ outs = ureg_writemask(out, TGSI_WRITEMASK_X);
+ zshuf = ureg_swizzle(zsrc3, TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z);
+ }
+
+ if (tex_s) {
+ ureg_I2F(ureg, sdst, ssrc);
+ ureg_MUL(ureg, outs, ssrc, ureg_scalar(scale, TGSI_SWIZZLE_X));
+ }
+
+ if (tex_rgbaz) {
+ ureg_MUL(ureg, zdst, zsrc, ureg_scalar(scale, TGSI_SWIZZLE_W));
+ ureg_F2I(ureg, zdst, zsrc);
+ ureg_AND(ureg, zdst3, zsrc, mask);
+ ureg_I2F(ureg, zdst3, zsrc3);
+ ureg_MUL(ureg, zdst3, zsrc3, scale);
+ ureg_MOV(ureg, outz, zshuf);
+ }
+ } else {
+ unsigned mask = TGSI_WRITEMASK_XYZW;
+
+ if (mode != NV50_BLIT_MODE_PASS) {
+ mask &= ~TGSI_WRITEMASK_ZW;
+ if (!tex_s)
+ mask = TGSI_WRITEMASK_X;
+ if (!tex_rgbaz)
+ mask = TGSI_WRITEMASK_Y;
+ }
+ ureg_MOV(ureg, ureg_writemask(out, mask), ureg_src(data));
+ }
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
+static void
+nv50_blitter_make_sampler(struct nv50_blitter *blit)
+{
+ /* clamp to edge, min/max lod = 0, nearest filtering */
+
+ blit->sampler[0].id = -1;
+
+ blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[1] =
+ NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+
+ /* clamp to edge, min/max lod = 0, bilinear filtering */
+
+ blit->sampler[1].id = -1;
+
+ blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
+ blit->sampler[1].tsc[1] =
+ NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+}
+
+unsigned
+nv50_blit_select_mode(const struct pipe_blit_info *info)
+{
+ const unsigned mask = info->mask;
+
+ switch (info->dst.resource->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ switch (mask & PIPE_MASK_ZS) {
+ case PIPE_MASK_ZS: return NV50_BLIT_MODE_Z24S8;
+ case PIPE_MASK_Z: return NV50_BLIT_MODE_Z24X8;
+ default:
+ return NV50_BLIT_MODE_X24S8;
+ }
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ switch (mask & PIPE_MASK_ZS) {
+ case PIPE_MASK_ZS: return NV50_BLIT_MODE_S8Z24;
+ case PIPE_MASK_Z: return NV50_BLIT_MODE_X8Z24;
+ default:
+ return NV50_BLIT_MODE_S8X24;
+ }
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ switch (mask & PIPE_MASK_ZS) {
+ case PIPE_MASK_ZS: return NV50_BLIT_MODE_ZS;
+ case PIPE_MASK_Z: return NV50_BLIT_MODE_PASS;
+ default:
+ return NV50_BLIT_MODE_XS;
+ }
+ default:
+ return NV50_BLIT_MODE_PASS;
+ }
+}
+
+static void
+nv50_blit_select_fp(struct nv50_blitctx *ctx, const struct pipe_blit_info *info)
+{
+ struct nv50_blitter *blitter = ctx->nv50->screen->blitter;
+
+ const enum pipe_texture_target ptarg =
+ nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target);
+
+ const unsigned targ = nv50_blit_texture_type(ptarg);
+ const unsigned mode = ctx->mode;
+
+ if (!blitter->fp[targ][mode]) {
+ pipe_mutex_lock(blitter->mutex);
+ if (!blitter->fp[targ][mode])
+ blitter->fp[targ][mode] =
+ nv50_blitter_make_fp(&ctx->nv50->base.pipe, mode, ptarg);
+ pipe_mutex_unlock(blitter->mutex);
+ }
+ ctx->fp = blitter->fp[targ][mode];
+}
+
+static void
+nv50_blit_set_dst(struct nv50_blitctx *ctx,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format)
+{
+ struct nv50_context *nv50 = ctx->nv50;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct pipe_surface templ;
+
+ if (util_format_is_depth_or_stencil(format))
+ templ.format = nv50_blit_zeta_to_colour_format(format);
+ else
+ templ.format = format;
+
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ);
+ nv50->framebuffer.nr_cbufs = 1;
+ nv50->framebuffer.zsbuf = NULL;
+ nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width;
+ nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height;
+}
+
+static void
+nv50_blit_set_src(struct nv50_blitctx *blit,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format, const uint8_t filter)
+{
+ struct nv50_context *nv50 = blit->nv50;
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct pipe_sampler_view templ;
+ uint32_t flags;
+ enum pipe_texture_target target;
+
+ target = nv50_blit_reinterpret_pipe_texture_target(res->target);
+
+ templ.format = format;
+ templ.u.tex.first_level = templ.u.tex.last_level = level;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;
+ flags |= NV50_TEXVIEW_ACCESS_RESOLVE;
+ if (filter && res->nr_samples == 8)
+ flags |= NV50_TEXVIEW_FILTER_MSAA8;
+
+ nv50->textures[2][0] = nv50_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nv50->textures[2][1] = NULL;
+
+ nv50->num_textures[0] = nv50->num_textures[1] = 0;
+ nv50->num_textures[2] = 1;
+
+ templ.format = nv50_zs_to_s_format(format);
+ if (templ.format != res->format) {
+ nv50->textures[2][1] = nv50_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nv50->num_textures[2] = 2;
+ }
+}
+
+static void
+nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
+{
+ struct nouveau_pushbuf *push = blit->nv50->base.pushbuf;
+
+ if (blit->nv50->cond_query) {
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
+ }
+
+ /* blend state */
+ BEGIN_NV04(push, NV50_3D(COLOR_MASK(0)), 1);
+ PUSH_DATA (push, blit->color_mask);
+ BEGIN_NV04(push, NV50_3D(BLEND_ENABLE(0)), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(LOGIC_OP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ /* rasterizer state */
+#ifndef NV50_SCISSORS_CLIPPING
+ BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 1);
+ PUSH_DATA (push, 1);
+#endif
+ BEGIN_NV04(push, NV50_3D(VERTEX_TWO_SIDE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ BEGIN_NV04(push, NV50_3D(POLYGON_MODE_FRONT), 3);
+ PUSH_DATA (push, NV50_3D_POLYGON_MODE_FRONT_FILL);
+ PUSH_DATA (push, NV50_3D_POLYGON_MODE_BACK_FILL);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(CULL_FACE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ /* zsa state */
+ BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
+ PUSH_DATA (push, 0);
+}
+
+static void
+nv50_blitctx_pre_blit(struct nv50_blitctx *ctx)
+{
+ struct nv50_context *nv50 = ctx->nv50;
+ struct nv50_blitter *blitter = nv50->screen->blitter;
+ int s;
+
+ ctx->saved.fb.width = nv50->framebuffer.width;
+ ctx->saved.fb.height = nv50->framebuffer.height;
+ ctx->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs;
+ ctx->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0];
+ ctx->saved.fb.zsbuf = nv50->framebuffer.zsbuf;
+
+ ctx->saved.rast = nv50->rast;
+
+ ctx->saved.vp = nv50->vertprog;
+ ctx->saved.gp = nv50->gmtyprog;
+ ctx->saved.fp = nv50->fragprog;
+
+ nv50->rast = &ctx->rast;
+
+ nv50->vertprog = &blitter->vp;
+ nv50->gmtyprog = NULL;
+ nv50->fragprog = ctx->fp;
+
+ for (s = 0; s < 3; ++s) {
+ ctx->saved.num_textures[s] = nv50->num_textures[s];
+ ctx->saved.num_samplers[s] = nv50->num_samplers[s];
+ }
+ ctx->saved.texture[0] = nv50->textures[2][0];
+ ctx->saved.texture[1] = nv50->textures[2][1];
+ ctx->saved.sampler[0] = nv50->samplers[2][0];
+ ctx->saved.sampler[1] = nv50->samplers[2][1];
+
+ nv50->samplers[2][0] = &blitter->sampler[ctx->filter];
+ nv50->samplers[2][1] = &blitter->sampler[ctx->filter];
+
+ nv50->num_samplers[0] = nv50->num_samplers[1] = 0;
+ nv50->num_samplers[2] = 2;
+
+ ctx->saved.dirty = nv50->dirty;
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+
+ nv50->dirty =
+ NV50_NEW_FRAMEBUFFER |
+ NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG |
+ NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS;
+}
+
+static void
+nv50_blitctx_post_blit(struct nv50_blitctx *blit)
+{
+ struct nv50_context *nv50 = blit->nv50;
+ int s;
+
+ pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL);
+
+ nv50->framebuffer.width = blit->saved.fb.width;
+ nv50->framebuffer.height = blit->saved.fb.height;
+ nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
+ nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
+ nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf;
+
+ nv50->rast = blit->saved.rast;
+
+ nv50->vertprog = blit->saved.vp;
+ nv50->gmtyprog = blit->saved.gp;
+ nv50->fragprog = blit->saved.fp;
+
+ pipe_sampler_view_reference(&nv50->textures[2][0], NULL);
+ pipe_sampler_view_reference(&nv50->textures[2][1], NULL);
+
+ for (s = 0; s < 3; ++s) {
+ nv50->num_textures[s] = blit->saved.num_textures[s];
+ nv50->num_samplers[s] = blit->saved.num_samplers[s];
+ }
+ nv50->textures[2][0] = blit->saved.texture[0];
+ nv50->textures[2][1] = blit->saved.texture[1];
+ nv50->samplers[2][0] = blit->saved.sampler[0];
+ nv50->samplers[2][1] = blit->saved.sampler[1];
+
+ if (nv50->cond_query)
+ nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query,
+ nv50->cond_cond, nv50->cond_mode);
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+
+ nv50->dirty = blit->saved.dirty |
+ (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK |
+ NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND |
+ NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG);
+}
+
+
+static void
+nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+{
+ struct nv50_blitctx *blit = nv50->blit;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_resource *src = info->src.resource;
+ struct pipe_resource *dst = info->dst.resource;
+ int32_t minx, maxx, miny, maxy;
+ int32_t i;
+ float x0, x1, y0, y1, z;
+ float dz;
+ float x_range, y_range;
+
+ blit->mode = nv50_blit_select_mode(info);
+ blit->color_mask = nv50_blit_derive_color_mask(info);
+ blit->filter = nv50_blit_get_filter(info);
+
+ nv50_blit_select_fp(blit, info);
+ nv50_blitctx_pre_blit(blit);
+
+ nv50_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
+ nv50_blit_set_src(blit, src, info->src.level, -1, info->src.format,
+ blit->filter);
+
+ nv50_blitctx_prepare_state(blit);
+
+ nv50_state_validate(nv50, ~0, 36);
+
+ x_range = (float)info->src.box.width / (float)info->dst.box.width;
+ y_range = (float)info->src.box.height / (float)info->dst.box.height;
+
+ x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
+ y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;
+
+ x1 = x0 + 16384.0f * x_range;
+ y1 = y0 + 16384.0f * y_range;
+
+ x0 *= (float)(1 << nv50_miptree(src)->ms_x);
+ x1 *= (float)(1 << nv50_miptree(src)->ms_x);
+ y0 *= (float)(1 << nv50_miptree(src)->ms_y);
+ y1 *= (float)(1 << nv50_miptree(src)->ms_y);
+
+ if (src->last_level > 0) {
+ /* If there are mip maps, GPU always assumes normalized coordinates. */
+ const unsigned l = info->src.level;
+ const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l);
+ const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l);
+ x0 /= fh;
+ x1 /= fh;
+ y0 /= fv;
+ y1 /= fv;
+ }
+
+ /* XXX: multiply by 6 for cube arrays ? */
+ dz = (float)info->src.box.depth / (float)info->dst.box.depth;
+ z = (float)info->src.box.z;
+ if (nv50_miptree(src)->layout_3d)
+ z += 0.5f * dz;
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ PUSH_DATA (push, 0x1);
+
+ /* Draw a large triangle in screen coordinates covering the whole
+ * render target, with scissors defining the destination region.
+ * The vertex is supplied with non-normalized texture coordinates
+ * arranged in a way to yield the desired offset and scale.
+ */
+
+ minx = info->dst.box.x;
+ maxx = info->dst.box.x + info->dst.box.width;
+ miny = info->dst.box.y;
+ maxy = info->dst.box.y + info->dst.box.height;
+ if (info->scissor_enable) {
+ minx = MAX2(minx, info->scissor.minx);
+ maxx = MIN2(maxx, info->scissor.maxx);
+ miny = MAX2(miny, info->scissor.miny);
+ maxy = MIN2(maxy, info->scissor.maxy);
+ }
+ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (maxx << 16) | minx);
+ PUSH_DATA (push, (maxy << 16) | miny);
+
+ for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
+ if (info->dst.box.z + i) {
+ BEGIN_NV04(push, NV50_3D(LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ }
+ PUSH_SPACE(push, 32);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
+ PUSH_DATAf(push, x1);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y1);
+ PUSH_DATAf(push, z);
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+ }
+ if (info->dst.box.z + info->dst.box.depth - 1) {
+ BEGIN_NV04(push, NV50_3D(LAYER), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ /* re-enable normally constant state */
+
+ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 1);
+
+ nv50_blitctx_post_blit(blit);
+}
+
+static void
+nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_miptree *dst = nv50_miptree(info->dst.resource);
+ struct nv50_miptree *src = nv50_miptree(info->src.resource);
+ const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0;
+ const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0;
+ const int32_t dz = info->dst.box.z;
+ const int32_t sz = info->src.box.z;
+ uint32_t dstw, dsth;
+ int32_t dstx, dsty;
+ int64_t srcx, srcy;
+ int64_t du_dx, dv_dy;
+ int i;
+ uint32_t mode;
+ uint32_t mask = nv50_blit_eng2d_get_mask(info);
+ boolean b;
+
+ mode = nv50_blit_get_filter(info) ?
+ NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
+ NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE;
+ mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ?
+ NV50_2D_BLIT_CONTROL_ORIGIN_CORNER : NV50_2D_BLIT_CONTROL_ORIGIN_CENTER;
+
+ du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
+ dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
+
+ b = info->dst.format == info->src.format;
+ nv50_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
+ nv50_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
+
+ if (info->scissor_enable) {
+ BEGIN_NV04(push, NV50_2D(CLIP_X), 5);
+ PUSH_DATA (push, info->scissor.minx << dst->ms_x);
+ PUSH_DATA (push, info->scissor.miny << dst->ms_y);
+ PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x);
+ PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y);
+ PUSH_DATA (push, 1); /* enable */
+ }
+
+ if (mask != 0xffffffff) {
+ BEGIN_NV04(push, NV50_2D(ROP), 1);
+ PUSH_DATA (push, 0xca); /* DPSDxax */
+ BEGIN_NV04(push, NV50_2D(PATTERN_COLOR_FORMAT), 1);
+ PUSH_DATA (push, NV50_2D_PATTERN_COLOR_FORMAT_32BPP);
+ BEGIN_NV04(push, NV50_2D(PATTERN_COLOR(0)), 4);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, 0xffffffff);
+ PUSH_DATA (push, 0xffffffff);
+ BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+ PUSH_DATA (push, NV50_2D_OPERATION_ROP);
+ } else
+ if (info->src.format != info->dst.format) {
+ if (info->src.format == PIPE_FORMAT_R8_UNORM ||
+ info->src.format == PIPE_FORMAT_R16_UNORM ||
+ info->src.format == PIPE_FORMAT_R16_FLOAT ||
+ info->src.format == PIPE_FORMAT_R32_FLOAT) {
+ mask = 0xffff0000; /* also makes condition for OPERATION reset true */
+ BEGIN_NV04(push, NV50_2D(BETA4), 2);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY_PREMULT);
+ }
+ }
+
+ if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
+ /* ms_x is always >= ms_y */
+ du_dx <<= src->ms_x - dst->ms_x;
+ dv_dy <<= src->ms_y - dst->ms_y;
+ } else {
+ du_dx >>= dst->ms_x - src->ms_x;
+ dv_dy >>= dst->ms_y - src->ms_y;
+ }
+
+ srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32);
+ srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32);
+
+ if (src->base.base.nr_samples > dst->base.base.nr_samples) {
+ /* center src coorinates for proper MS resolve filtering */
+ srcx += (int64_t)src->ms_x << 32;
+ srcy += (int64_t)src->ms_y << 32;
+ }
+
+ dstx = info->dst.box.x << dst->ms_x;
+ dsty = info->dst.box.y << dst->ms_y;
+
+ dstw = info->dst.box.width << dst->ms_x;
+ dsth = info->dst.box.height << dst->ms_y;
+
+ if (dstx < 0) {
+ dstw += dstx;
+ srcx -= du_dx * dstx;
+ dstx = 0;
+ }
+ if (dsty < 0) {
+ dsth += dsty;
+ srcy -= dv_dy * dsty;
+ dsty = 0;
+ }
+
+ BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dstx);
+ PUSH_DATA (push, dsty);
+ PUSH_DATA (push, dstw);
+ PUSH_DATA (push, dsth);
+ BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, du_dx);
+ PUSH_DATA (push, du_dx >> 32);
+ PUSH_DATA (push, dv_dy);
+ PUSH_DATA (push, dv_dy >> 32);
+
+ BCTX_REFN(nv50->bufctx, 2D, &dst->base, WR);
+ BCTX_REFN(nv50->bufctx, 2D, &src->base, RD);
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx);
+ if (nouveau_pushbuf_validate(nv50->base.pushbuf))
+ return;
+
+ for (i = 0; i < info->dst.box.depth; ++i) {
+ if (i > 0) {
+ /* no scaling in z-direction possible for eng2d blits */
+ if (dst->layout_3d) {
+ BEGIN_NV04(push, NV50_2D(DST_LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ } else {
+ const unsigned z = info->dst.box.z + i;
+ BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
+ PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+ }
+ if (src->layout_3d) {
+ /* not possible because of depth tiling */
+ assert(0);
+ } else {
+ const unsigned z = info->src.box.z + i;
+ BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, src->base.address + z * src->layer_stride);
+ PUSH_DATA (push, src->base.address + z * src->layer_stride);
+ }
+ BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */
+ PUSH_DATA (push, srcy >> 32);
+ } else {
+ BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, srcx);
+ PUSH_DATA (push, srcx >> 32);
+ PUSH_DATA (push, srcy);
+ PUSH_DATA (push, srcy >> 32);
+ }
+ }
+ nv50_bufctx_fence(nv50->bufctx, FALSE);
+
+ nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+
+ if (info->scissor_enable) {
+ BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+ if (mask != 0xffffffff) {
+ BEGIN_NV04(push, NV50_2D(OPERATION), 1);
+ PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
+ }
+}
+
+static void
+nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ boolean eng3d = FALSE;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
+ if (!(info->mask & PIPE_MASK_ZS))
+ return;
+ if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
+ info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ eng3d = TRUE;
+ if (info->filter != PIPE_TEX_FILTER_NEAREST)
+ eng3d = TRUE;
+ } else {
+ if (!(info->mask & PIPE_MASK_RGBA))
+ return;
+ if (info->mask != PIPE_MASK_RGBA)
+ eng3d = TRUE;
+ }
+
+ if (nv50_miptree(info->src.resource)->layout_3d) {
+ eng3d = TRUE;
+ } else
+ if (info->src.box.depth != info->dst.box.depth) {
+ eng3d = TRUE;
+ debug_printf("blit: cannot filter array or cube textures in z direction");
+ }
+
+ if (!eng3d && info->dst.format != info->src.format) {
+ if (!nv50_2d_dst_format_faithful(info->dst.format) ||
+ !nv50_2d_src_format_faithful(info->src.format)) {
+ eng3d = TRUE;
+ } else
+ if (!nv50_2d_src_format_faithful(info->src.format)) {
+ if (!util_format_is_luminance(info->src.format)) {
+ if (util_format_is_intensity(info->src.format))
+ eng3d = TRUE;
+ else
+ if (!nv50_2d_dst_format_ops_supported(info->dst.format))
+ eng3d = TRUE;
+ else
+ eng3d = !nv50_2d_format_supported(info->src.format);
+ }
+ } else
+ if (util_format_is_luminance_alpha(info->src.format))
+ eng3d = TRUE;
+ }
+
+ if (info->src.resource->nr_samples == 8 &&
+ info->dst.resource->nr_samples <= 1)
+ eng3d = TRUE;
+
+ /* FIXME: can't make this work with eng2d anymore */
+ if (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1)
+ eng3d = TRUE;
+
+ /* FIXME: find correct src coordinate adjustments */
+ if ((info->src.box.width != info->dst.box.width &&
+ info->src.box.width != -info->dst.box.width) ||
+ (info->src.box.height != info->dst.box.height &&
+ info->src.box.height != -info->dst.box.height))
+ eng3d = TRUE;
+
+ if (!eng3d)
+ nv50_blit_eng2d(nv50, info);
+ else
+ nv50_blit_3d(nv50, info);
+}
+
+boolean
+nv50_blitter_create(struct nv50_screen *screen)
+{
+ screen->blitter = CALLOC_STRUCT(nv50_blitter);
+ if (!screen->blitter) {
+ NOUVEAU_ERR("failed to allocate blitter struct\n");
+ return FALSE;
+ }
+
+ pipe_mutex_init(screen->blitter->mutex);
+
+ nv50_blitter_make_vp(screen->blitter);
+ nv50_blitter_make_sampler(screen->blitter);
+
+ return TRUE;
+}
+
+void
+nv50_blitter_destroy(struct nv50_screen *screen)
+{
+ struct nv50_blitter *blitter = screen->blitter;
+ unsigned i, m;
+
+ for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) {
+ for (m = 0; m < NV50_BLIT_MODES; ++m) {
+ struct nv50_program *prog = blitter->fp[i][m];
+ if (prog) {
+ nv50_program_destroy(NULL, prog);
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+ }
+ }
+ }
+
+ FREE(blitter);
+}
+
+boolean
+nv50_blitctx_create(struct nv50_context *nv50)
+{
+ nv50->blit = CALLOC_STRUCT(nv50_blitctx);
+ if (!nv50->blit) {
+ NOUVEAU_ERR("failed to allocate blit context\n");
+ return FALSE;
+ }
+
+ nv50->blit->nv50 = nv50;
+
+ nv50->blit->rast.pipe.half_pixel_center = 1;
+
+ return TRUE;
+}
+
+void
+nv50_init_surface_functions(struct nv50_context *nv50)
+{
+ struct pipe_context *pipe = &nv50->base.pipe;
+
+ pipe->resource_copy_region = nv50_resource_copy_region;
+ pipe->blit = nv50_blit;
+ pipe->clear_render_target = nv50_clear_render_target;
+ pipe->clear_depth_stencil = nv50_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
new file mode 100644
index 00000000000..9e512928381
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
+
+#include "util/u_format.h"
+
+#define NV50_TIC_0_SWIZZLE__MASK \
+ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
+ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
+}
+
+struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ const struct pipe_sampler_view *templ)
+{
+ uint32_t flags = 0;
+
+ if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER)
+ flags |= NV50_TEXVIEW_SCALED_COORDS;
+
+ return nv50_create_texture_view(pipe, res, templ, flags, res->target);
+}
+
+struct pipe_sampler_view *
+nv50_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ const struct util_format_description *desc;
+ uint64_t addr;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt = nv50_miptree(texture);
+ boolean tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ /* TIC[0] */
+
+ tic[0] = nv50_format_table[view->pipe.format].tic;
+
+ tex_int = util_format_is_pure_integer(view->pipe.format);
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ addr = mt->base.address;
+
+ if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY ||
+ mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) {
+ addr += view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ } else {
+ depth = mt->base.base.depth0;
+ }
+
+ tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
+ if (target == PIPE_BUFFER) {
+ addr += view->pipe.u.buf.first_element * desc->block.bits / 8;
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[3] = 0;
+ tic[4] = /* width */
+ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
+ tic[5] = 0;
+ } else {
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[3] = mt->level[0].pitch;
+ tic[4] = mt->base.base.width0;
+ tic[5] = (1 << 16) | mt->base.base.height0;
+ }
+ tic[6] =
+ tic[7] = 0;
+ tic[1] = addr;
+ tic[2] |= addr >> 32;
+ return &view->pipe;
+ }
+
+ tic[1] = addr;
+ tic[2] |= (addr >> 32) & 0xff;
+
+ tic[2] |=
+ ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
+ ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
+
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ break;
+ case PIPE_BUFFER:
+ assert(0); /* should be linear and handled above ! */
+ tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
+ break;
+ default:
+ NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
+ return FALSE;
+ }
+
+ tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
+
+ tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x);
+
+ tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+
+ tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+
+ if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
+ if (mt->base.base.last_level)
+ tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK;
+
+ return &view->pipe;
+}
+
+static boolean
+nv50_validate_tic(struct nv50_context *nv50, int s)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nouveau_bo *txc = nv50->screen->txc;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nv50->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
+ struct nv04_resource *res;
+
+ if (!tic) {
+ BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
+ PUSH_DATA (push, (i << 1) | 0);
+ continue;
+ }
+ res = &nv50_miptree(tic->pipe.texture)->base;
+
+ if (tic->id < 0) {
+ tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, txc->offset);
+ PUSH_DATA (push, txc->offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, tic->id * 32);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, 0x20);
+ }
+
+ nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD);
+
+ BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
+ PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);
+ }
+ for (; i < nv50->state.num_textures[s]; ++i) {
+ BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
+ PUSH_DATA (push, (i << 1) | 0);
+ }
+ nv50->state.num_textures[s] = nv50->num_textures[s];
+
+ return need_flush;
+}
+
+void nv50_validate_textures(struct nv50_context *nv50)
+{
+ boolean need_flush;
+
+ need_flush = nv50_validate_tic(nv50, 0);
+ need_flush |= nv50_validate_tic(nv50, 2);
+
+ if (need_flush) {
+ BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TIC_FLUSH), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+ }
+}
+
+static boolean
+nv50_validate_tsc(struct nv50_context *nv50, int s)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nv50->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nv50->samplers[s][i]);
+
+ if (!tsc) {
+ BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+ PUSH_DATA (push, (i << 4) | 0);
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
+
+ nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
+ 65536 + tsc->id * 32,
+ NOUVEAU_BO_VRAM, 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+ PUSH_DATA (push, (tsc->id << 12) | (i << 4) | 1);
+ }
+ for (; i < nv50->state.num_samplers[s]; ++i) {
+ BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1);
+ PUSH_DATA (push, (i << 4) | 0);
+ }
+ nv50->state.num_samplers[s] = nv50->num_samplers[s];
+
+ return need_flush;
+}
+
+void nv50_validate_samplers(struct nv50_context *nv50)
+{
+ boolean need_flush;
+
+ need_flush = nv50_validate_tsc(nv50, 0);
+ need_flush |= nv50_validate_tsc(nv50, 2);
+
+ if (need_flush) {
+ BEGIN_NV04(nv50->base.pushbuf, NV50_3D(TSC_FLUSH), 1);
+ PUSH_DATA (nv50->base.pushbuf, 0);
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
new file mode 100644
index 00000000000..31eab9b5d87
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h
@@ -0,0 +1,306 @@
+#ifndef NV50_TEXTURE_XML
+#define NV50_TEXTURE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nv50_texture.xml ( 8648 bytes, from 2013-04-13 12:49:11)
+- rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- rnndb/nvchipsets.xml ( 3954 bytes, from 2013-03-26 01:26:43)
+- rnndb/nv50_defs.xml ( 16652 bytes, from 2013-04-04 10:57:15)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Koƛcielnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_TIC_MAP_ZERO 0x00000000
+#define NV50_TIC_MAP_C0 0x00000002
+#define NV50_TIC_MAP_C1 0x00000003
+#define NV50_TIC_MAP_C2 0x00000004
+#define NV50_TIC_MAP_C3 0x00000005
+#define NV50_TIC_MAP_ONE_INT 0x00000006
+#define NV50_TIC_MAP_ONE_FLOAT 0x00000007
+#define NV50_TIC_TYPE_SNORM 0x00000001
+#define NV50_TIC_TYPE_UNORM 0x00000002
+#define NV50_TIC_TYPE_SINT 0x00000003
+#define NV50_TIC_TYPE_UINT 0x00000004
+#define NV50_TIC_TYPE_SSCALED 0x00000005
+#define NV50_TIC_TYPE_USCALED 0x00000006
+#define NV50_TIC_TYPE_FLOAT 0x00000007
+#define NV50_TSC_WRAP_REPEAT 0x00000000
+#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001
+#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002
+#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003
+#define NV50_TSC_WRAP_CLAMP 0x00000004
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005
+#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006
+#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007
+#define NV50_TIC__SIZE 0x00000020
+#define NV50_TIC_0 0x00000000
+#define NV50_TIC_0_MAPA__MASK 0x38000000
+#define NV50_TIC_0_MAPA__SHIFT 27
+#define NV50_TIC_0_MAPB__MASK 0x07000000
+#define NV50_TIC_0_MAPB__SHIFT 24
+#define NV50_TIC_0_MAPG__MASK 0x00e00000
+#define NV50_TIC_0_MAPG__SHIFT 21
+#define NV50_TIC_0_MAPR__MASK 0x001c0000
+#define NV50_TIC_0_MAPR__SHIFT 18
+#define NV50_TIC_0_TYPE3__MASK 0x00038000
+#define NV50_TIC_0_TYPE3__SHIFT 15
+#define NV50_TIC_0_TYPE2__MASK 0x00007000
+#define NV50_TIC_0_TYPE2__SHIFT 12
+#define NV50_TIC_0_TYPE1__MASK 0x00000e00
+#define NV50_TIC_0_TYPE1__SHIFT 9
+#define NV50_TIC_0_TYPE0__MASK 0x000001c0
+#define NV50_TIC_0_TYPE0__SHIFT 6
+#define NV50_TIC_0_FMT__MASK 0x0000003f
+#define NV50_TIC_0_FMT__SHIFT 0
+#define NV50_TIC_0_FMT_32_32_32_32 0x00000001
+#define NVC0_TIC_0_FMT_32_32_32 0x00000002
+#define NV50_TIC_0_FMT_16_16_16_16 0x00000003
+#define NV50_TIC_0_FMT_32_32 0x00000004
+#define NV50_TIC_0_FMT_32_8_X24 0x00000005
+#define NV50_TIC_0_FMT_8_8_8_8 0x00000008
+#define NV50_TIC_0_FMT_10_10_10_2 0x00000009
+#define NV50_TIC_0_FMT_16_16 0x0000000c
+#define NV50_TIC_0_FMT_24_8 0x0000000d
+#define NV50_TIC_0_FMT_8_24 0x0000000e
+#define NV50_TIC_0_FMT_32 0x0000000f
+#define NV50_TIC_0_FMT_BPTC_FLOAT 0x00000010
+#define NV50_TIC_0_FMT_BPTC_UFLOAT 0x00000011
+#define NV50_TIC_0_FMT_4_4_4_4 0x00000012
+#define NV50_TIC_0_FMT_1_5_5_5 0x00000013
+#define NV50_TIC_0_FMT_5_5_5_1 0x00000014
+#define NV50_TIC_0_FMT_5_6_5 0x00000015
+#define NV50_TIC_0_FMT_5_5_6 0x00000016
+#define NV50_TIC_0_FMT_BPTC 0x00000017
+#define NV50_TIC_0_FMT_8_8 0x00000018
+#define NV50_TIC_0_FMT_16 0x0000001b
+#define NV50_TIC_0_FMT_8 0x0000001d
+#define NV50_TIC_0_FMT_4_4 0x0000001e
+#define NV50_TIC_0_FMT_BITMAP 0x0000001f
+#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020
+#define NV50_TIC_0_FMT_11_11_10 0x00000021
+#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022
+#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023
+#define NV50_TIC_0_FMT_DXT1 0x00000024
+#define NV50_TIC_0_FMT_DXT3 0x00000025
+#define NV50_TIC_0_FMT_DXT5 0x00000026
+#define NV50_TIC_0_FMT_RGTC1 0x00000027
+#define NV50_TIC_0_FMT_RGTC2 0x00000028
+#define NV50_TIC_0_FMT_S8_Z24 0x00000029
+#define NV50_TIC_0_FMT_Z24_X8 0x0000002a
+#define NV50_TIC_0_FMT_Z24_S8 0x0000002b
+#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c
+#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d
+#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e
+#define NV50_TIC_0_FMT_Z32 0x0000002f
+#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030
+#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031
+#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032
+#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033
+#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034
+#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035
+#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036
+#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037
+#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038
+#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039
+#define NV50_TIC_0_FMT_Z16 0x0000003a
+
+#define NV50_TIC_1 0x00000004
+#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff
+#define NV50_TIC_1_OFFSET_LOW__SHIFT 0
+
+#define NV50_TIC_2 0x00000008
+#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff
+#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0
+#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400
+#define NV50_TIC_2_TARGET__MASK 0x0003c000
+#define NV50_TIC_2_TARGET__SHIFT 14
+#define NV50_TIC_2_TARGET_1D 0x00000000
+#define NV50_TIC_2_TARGET_2D 0x00004000
+#define NV50_TIC_2_TARGET_3D 0x00008000
+#define NV50_TIC_2_TARGET_CUBE 0x0000c000
+#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000
+#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000
+#define NV50_TIC_2_TARGET_BUFFER 0x00018000
+#define NV50_TIC_2_TARGET_RECT 0x0001c000
+#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000
+#define NV50_TIC_2_LINEAR 0x00040000
+#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000
+#define NV50_TIC_2_TILE_MODE_X__SHIFT 19
+#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000
+#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22
+#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000
+#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25
+#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000
+#define NV50_TIC_2_2D_UNK0258__SHIFT 28
+#define NV50_TIC_2_NO_BORDER 0x40000000
+#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000
+
+#define NV50_TIC_3 0x0000000c
+#define NV50_TIC_3_PITCH__MASK 0xffffffff
+#define NV50_TIC_3_PITCH__SHIFT 0
+
+#define NV50_TIC_4 0x00000010
+#define NV50_TIC_4_WIDTH__MASK 0xffffffff
+#define NV50_TIC_4_WIDTH__SHIFT 0
+
+#define NV50_TIC_5 0x00000014
+#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000
+#define NV50_TIC_5_LAST_LEVEL__SHIFT 28
+#define NV50_TIC_5_DEPTH__MASK 0x0fff0000
+#define NV50_TIC_5_DEPTH__SHIFT 16
+#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff
+#define NV50_TIC_5_HEIGHT__SHIFT 0
+
+#define NV50_TIC_7 0x0000001c
+#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f
+#define NV50_TIC_7_BASE_LEVEL__SHIFT 0
+#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0
+#define NV50_TIC_7_MAX_LEVEL__SHIFT 4
+#define NV50_TIC_7_MS_MODE__MASK 0x0000f000
+#define NV50_TIC_7_MS_MODE__SHIFT 12
+#define NV50_TIC_7_MS_MODE_MS1 0x00000000
+#define NV50_TIC_7_MS_MODE_MS2 0x00001000
+#define NV50_TIC_7_MS_MODE_MS4 0x00002000
+#define NV50_TIC_7_MS_MODE_MS8 0x00003000
+#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000
+#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000
+#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000
+#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000
+#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000
+#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000
+#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000
+
+#define NV50_TSC__SIZE 0x00000020
+#define NV50_TSC_0 0x00000000
+#define NV50_TSC_0_WRAPS__MASK 0x00000007
+#define NV50_TSC_0_WRAPS__SHIFT 0
+#define NV50_TSC_0_WRAPT__MASK 0x00000038
+#define NV50_TSC_0_WRAPT__SHIFT 3
+#define NV50_TSC_0_WRAPR__MASK 0x000001c0
+#define NV50_TSC_0_WRAPR__SHIFT 6
+#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00
+#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10
+#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000
+#define NV50_TSC_0_BOX_S__MASK 0x0001c000
+#define NV50_TSC_0_BOX_S__SHIFT 14
+#define NV50_TSC_0_BOX_T__MASK 0x000e0000
+#define NV50_TSC_0_BOX_T__SHIFT 17
+#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000
+#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20
+
+#define NV50_TSC_1 0x00000004
+#define NV50_TSC_1_UNKN_ANISO_15 0x10000000
+#define NV50_TSC_1_UNKN_ANISO_35 0x18000000
+#define NV50_TSC_1_MAGF__MASK 0x00000003
+#define NV50_TSC_1_MAGF__SHIFT 0
+#define NV50_TSC_1_MAGF_NEAREST 0x00000001
+#define NV50_TSC_1_MAGF_LINEAR 0x00000002
+#define NV50_TSC_1_MINF__MASK 0x00000030
+#define NV50_TSC_1_MINF__SHIFT 4
+#define NV50_TSC_1_MINF_NEAREST 0x00000010
+#define NV50_TSC_1_MINF_LINEAR 0x00000020
+#define NV50_TSC_1_MIPF__MASK 0x000000c0
+#define NV50_TSC_1_MIPF__SHIFT 6
+#define NV50_TSC_1_MIPF_NONE 0x00000040
+#define NV50_TSC_1_MIPF_NEAREST 0x00000080
+#define NV50_TSC_1_MIPF_LINEAR 0x000000c0
+#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200
+#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000
+#define NV50_TSC_1_LOD_BIAS__SHIFT 12
+#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000
+
+#define NV50_TSC_2 0x00000008
+#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff
+#define NV50_TSC_2_MIN_LOD__SHIFT 0
+#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000
+#define NV50_TSC_2_MAX_LOD__SHIFT 12
+#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000
+#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24
+
+#define NV50_TSC_3 0x0000000c
+#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000
+#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12
+#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000
+#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20
+
+#define NV50_TSC_4 0x00000010
+#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff
+#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0
+
+#define NV50_TSC_5 0x00000014
+#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff
+#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0
+
+#define NV50_TSC_6 0x00000018
+#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff
+#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0
+
+#define NV50_TSC_7 0x0000001c
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff
+#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0
+
+
+#endif /* NV50_TEXTURE_XML */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
new file mode 100644
index 00000000000..a9906829fec
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -0,0 +1,412 @@
+
+#include "util/u_format.h"
+
+#include "nv50/nv50_context.h"
+
+#include "nv50/nv50_defs.xml.h"
+
+struct nv50_transfer {
+ struct pipe_transfer base;
+ struct nv50_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint32_t nblocksy;
+};
+
+void
+nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,
+ struct pipe_resource *restrict res, unsigned l,
+ unsigned x, unsigned y, unsigned z)
+{
+ struct nv50_miptree *mt = nv50_miptree(res);
+ const unsigned w = u_minify(res->width0, l);
+ const unsigned h = u_minify(res->height0, l);
+
+ rect->bo = mt->base.bo;
+ rect->domain = mt->base.domain;
+ rect->base = mt->level[l].offset;
+ rect->pitch = mt->level[l].pitch;
+ if (util_format_is_plain(res->format)) {
+ rect->width = w << mt->ms_x;
+ rect->height = h << mt->ms_y;
+ rect->x = x << mt->ms_x;
+ rect->y = y << mt->ms_y;
+ } else {
+ rect->width = util_format_get_nblocksx(res->format, w);
+ rect->height = util_format_get_nblocksy(res->format, h);
+ rect->x = util_format_get_nblocksx(res->format, x);
+ rect->y = util_format_get_nblocksy(res->format, y);
+ }
+ rect->tile_mode = mt->level[l].tile_mode;
+ rect->cpp = util_format_get_blocksize(res->format);
+
+ if (mt->layout_3d) {
+ rect->z = z;
+ rect->depth = u_minify(res->depth0, l);
+ } else {
+ rect->base += z * mt->layer_stride;
+ rect->z = 0;
+ rect->depth = 1;
+ }
+}
+
+void
+nv50_m2mf_transfer_rect(struct nv50_context *nv50,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nouveau_bufctx *bctx = nv50->bufctx;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ if (nouveau_bo_memtype(src->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 6);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, src->tile_mode);
+ PUSH_DATA (push, src->width * cpp);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_IN), 1);
+ PUSH_DATA (push, src->pitch);
+ }
+
+ if (nouveau_bo_memtype(dst->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 6);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, dst->tile_mode);
+ PUSH_DATA (push, dst->width * cpp);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_OUT), 1);
+ PUSH_DATA (push, dst->pitch);
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->bo->offset + src_ofst);
+ PUSH_DATAh(push, dst->bo->offset + dst_ofst);
+
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);
+ PUSH_DATA (push, src->bo->offset + src_ofst);
+ PUSH_DATA (push, dst->bo->offset + dst_ofst);
+
+ if (nouveau_bo_memtype(src->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_IN), 1);
+ PUSH_DATA (push, (sy << 16) | (src->x * cpp));
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (nouveau_bo_memtype(dst->bo)) {
+ BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_OUT), 1);
+ PUSH_DATA (push, (dy << 16) | (dst->x * cpp));
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, line_count);
+ PUSH_DATA (push, (1 << 8) | (1 << 0));
+ PUSH_DATA (push, 0);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+void
+nv50_sifc_linear_u8(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nv50_context *nv50 = nv50_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ offset &= ~0xff;
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, xcoord);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 1);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+ PUSH_DATAp(push, src, nr);
+
+ src += nr;
+ count -= nr;
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
+void
+nv50_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);
+ PUSH_DATA (push, 1);
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);
+ PUSH_DATA (push, src->offset + srcoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);
+ PUSH_DATA (push, bytes);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, (1 << 8) | (1 << 0));
+ PUSH_DATA (push, 0);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+void *
+nv50_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct nv50_screen *screen = nv50_screen(pctx->screen);
+ struct nv50_context *nv50 = nv50_context(pctx);
+ struct nouveau_device *dev = nv50->screen->base.device;
+ const struct nv50_miptree *mt = nv50_miptree(res);
+ struct nv50_transfer *tx;
+ uint32_t size;
+ int ret;
+ unsigned flags = 0;
+
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ tx = CALLOC_STRUCT(nv50_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (util_format_is_plain(res->format)) {
+ tx->nblocksx = box->width << mt->ms_x;
+ tx->nblocksy = box->height << mt->ms_x;
+ } else {
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+ }
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z);
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->base.box.depth, NULL, &tx->rect[1].bo);
+ if (ret) {
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].cpp = tx->rect[0].cpp;
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
+ unsigned z = tx->rect[0].z;
+ unsigned i;
+ for (i = 0; i < box->depth; ++i) {
+ nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
+ }
+
+ if (tx->rect[1].bo->map) {
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags, screen->base.client);
+ if (ret) {
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ FREE(tx);
+ return NULL;
+ }
+
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+}
+
+void
+nv50_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nv50_context *nv50 = nv50_context(pctx);
+ struct nv50_transfer *tx = (struct nv50_transfer *)transfer;
+ struct nv50_miptree *mt = nv50_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->base.box.depth; ++i) {
+ nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ }
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+}
+
+void
+nv50_cb_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx;
+
+ assert(!(offset & 3));
+ size = align(size, 0x100);
+
+ nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ while (words) {
+ unsigned nr;
+
+ nr = PUSH_AVAIL(push);
+ nr = MIN2(nr - 7, words);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+
+ BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ PUSH_DATA (push, (NV50_CB_TMP << 16) | (size & 0xffff));
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (offset << 6) | NV50_CB_TMP);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
+ PUSH_DATAp(push, data, nr);
+
+ words -= nr;
+ data += nr;
+ offset += nr * 4;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.h b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h
new file mode 100644
index 00000000000..c58cb0008df
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.h
@@ -0,0 +1,27 @@
+
+#ifndef __NV50_TRANSFER_H__
+#define __NV50_TRANSFER_H__
+
+#include "pipe/p_state.h"
+
+struct nv50_m2mf_rect {
+ struct nouveau_bo *bo;
+ uint32_t base;
+ unsigned domain;
+ uint32_t pitch;
+ uint32_t width;
+ uint32_t x;
+ uint32_t height;
+ uint32_t y;
+ uint16_t depth;
+ uint16_t z;
+ uint16_t tile_mode;
+ uint16_t cpp;
+};
+
+void
+nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,
+ struct pipe_resource *restrict res, unsigned l,
+ unsigned x, unsigned y, unsigned z);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
new file mode 100644
index 00000000000..c6162b5f415
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -0,0 +1,820 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_resource.h"
+
+#include "nv50/nv50_3d.xml.h"
+
+void
+nv50_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
+{
+ struct nv50_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
+}
+
+void *
+nv50_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv50_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+
+ so = MALLOC(sizeof(*so) +
+ num_elements * sizeof(struct nv50_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_elts = 0;
+ so->instance_bufs = 0;
+ so->need_conversion = FALSE;
+
+ memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
+
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ so->min_instance_div[i] = 0xffffffff;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ unsigned size;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nv50_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ FREE(so);
+ return NULL;
+ }
+ so->element[i].state = nv50_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
+ }
+ so->element[i].state |= i;
+
+ size = util_format_get_blocksize(fmt);
+ if (so->vb_access_size[vbi] < (ve->src_offset + size))
+ so->vb_access_size[vbi] = ve->src_offset + size;
+
+ if (1) {
+ unsigned j = transkey.nr_elements++;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ if (ve->instance_divisor < so->min_instance_div[vbi])
+ so->min_instance_div[vbi] = ve->instance_divisor;
+ }
+ }
+ }
+
+ so->translate = translate_create(&transkey);
+ so->vertex_size = transkey.output_stride / 4;
+ so->packet_vertex_limit = NV04_PFIFO_MAX_PACKET_LEN /
+ MAX2(so->vertex_size, 1);
+
+ return so;
+}
+
+#define NV50_3D_VERTEX_ATTRIB_INACTIVE \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 | \
+ NV50_3D_VERTEX_ARRAY_ATTRIB_CONST
+
+static void
+nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
+ struct pipe_vertex_element *ve, unsigned attr)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ const void *data = (const uint8_t *)vb->user_buffer + ve->src_offset;
+ float v[4];
+ const unsigned nc = util_format_get_nr_components(ve->src_format);
+ const struct util_format_description *desc =
+ util_format_description(ve->src_format);
+
+ assert(vb->user_buffer);
+
+ if (desc->channel[0].pure_integer) {
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ desc->unpack_rgba_sint((int32_t *)v, 0, data, 0, 1, 1);
+ } else {
+ desc->unpack_rgba_uint((uint32_t *)v, 0, data, 0, 1, 1);
+ }
+ } else {
+ desc->unpack_rgba_float(v, 0, data, 0, 1, 1);
+ }
+
+ switch (nc) {
+ case 4:
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_4F_X(attr)), 4);
+ PUSH_DATAf(push, v[0]);
+ PUSH_DATAf(push, v[1]);
+ PUSH_DATAf(push, v[2]);
+ PUSH_DATAf(push, v[3]);
+ break;
+ case 3:
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(attr)), 3);
+ PUSH_DATAf(push, v[0]);
+ PUSH_DATAf(push, v[1]);
+ PUSH_DATAf(push, v[2]);
+ break;
+ case 2:
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(attr)), 2);
+ PUSH_DATAf(push, v[0]);
+ PUSH_DATAf(push, v[1]);
+ break;
+ case 1:
+ if (attr == nv50->vertprog->vp.edgeflag) {
+ BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
+ PUSH_DATA (push, v[0] ? 1 : 0);
+ }
+ BEGIN_NV04(push, NV50_3D(VTX_ATTR_1F(attr)), 1);
+ PUSH_DATAf(push, v[0]);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static INLINE void
+nv50_user_vbuf_range(struct nv50_context *nv50, int vbi,
+ uint32_t *base, uint32_t *size)
+{
+ if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) {
+ /* TODO: use min and max instance divisor to get a proper range */
+ *base = 0;
+ *size = nv50->vtxbuf[vbi].buffer->width0;
+ } else {
+ /* NOTE: if there are user buffers, we *must* have index bounds */
+ assert(nv50->vb_elt_limit != ~0);
+ *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride;
+ *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride +
+ nv50->vertex->vb_access_size[vbi];
+ }
+}
+
+static void
+nv50_upload_user_buffers(struct nv50_context *nv50,
+ uint64_t addrs[], uint32_t limits[])
+{
+ unsigned b;
+
+ for (b = 0; b < nv50->num_vtxbufs; ++b) {
+ struct nouveau_bo *bo;
+ const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
+ uint32_t base, size;
+
+ if (!(nv50->vbo_user & (1 << b)) || !vb->stride)
+ continue;
+ nv50_user_vbuf_range(nv50, b, &base, &size);
+
+ limits[b] = base + size - 1;
+ addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size,
+ &bo);
+ if (addrs[b])
+ BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD, bo);
+ }
+ nv50->base.vbo_dirty = TRUE;
+}
+
+static void
+nv50_update_user_vbufs(struct nv50_context *nv50)
+{
+ uint64_t address[PIPE_MAX_ATTRIBS];
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned i;
+ uint32_t written = 0;
+
+ for (i = 0; i < nv50->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nv50->vertex->element[i].pipe;
+ const unsigned b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b];
+ uint32_t base, size;
+
+ if (!(nv50->vbo_user & (1 << b)))
+ continue;
+
+ if (!vb->stride) {
+ nv50_emit_vtxattr(nv50, vb, ve, i);
+ continue;
+ }
+ nv50_user_vbuf_range(nv50, b, &base, &size);
+
+ if (!(written & (1 << b))) {
+ struct nouveau_bo *bo;
+ const uint32_t bo_flags = NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+ written |= 1 << b;
+ address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer,
+ base, size, &bo);
+ if (address[b])
+ BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
+ }
+
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ PUSH_DATAh(push, address[b] + base + size - 1);
+ PUSH_DATA (push, address[b] + base + size - 1);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_START_HIGH(i)), 2);
+ PUSH_DATAh(push, address[b] + ve->src_offset);
+ PUSH_DATA (push, address[b] + ve->src_offset);
+ }
+ nv50->base.vbo_dirty = TRUE;
+}
+
+static INLINE void
+nv50_release_user_vbufs(struct nv50_context *nv50)
+{
+ if (nv50->vbo_user) {
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP);
+ nouveau_scratch_done(&nv50->base);
+ }
+}
+
+void
+nv50_vertex_arrays_validate(struct nv50_context *nv50)
+{
+ uint64_t addrs[PIPE_MAX_ATTRIBS];
+ uint32_t limits[PIPE_MAX_ATTRIBS];
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_vertex_stateobj *vertex = nv50->vertex;
+ struct pipe_vertex_buffer *vb;
+ struct nv50_vertex_element *ve;
+ uint32_t mask;
+ uint32_t refd = 0;
+ unsigned i;
+ const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
+
+ if (unlikely(vertex->need_conversion))
+ nv50->vbo_fifo = ~0;
+ else
+ if (nv50->vbo_user & ~nv50->vbo_constant)
+ nv50->vbo_fifo = nv50->vbo_push_hint ? ~0 : 0;
+ else
+ nv50->vbo_fifo = 0;
+
+ if (!nv50->vbo_fifo) {
+ /* if vertex buffer was written by GPU - flush VBO cache */
+ for (i = 0; i < nv50->num_vtxbufs; ++i) {
+ struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
+ if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ nv50->base.vbo_dirty = TRUE;
+ break;
+ }
+ }
+ }
+
+ /* update vertex format state */
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_ATTRIB(0)), n);
+ if (nv50->vbo_fifo) {
+ nv50->state.num_vtxelts = vertex->num_elements;
+ for (i = 0; i < vertex->num_elements; ++i)
+ PUSH_DATA (push, vertex->element[i].state);
+ for (; i < n; ++i)
+ PUSH_DATA (push, NV50_3D_VERTEX_ATTRIB_INACTIVE);
+ for (i = 0; i < n; ++i) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ }
+ return;
+ }
+ for (i = 0; i < vertex->num_elements; ++i) {
+ const unsigned b = vertex->element[i].pipe.vertex_buffer_index;
+ ve = &vertex->element[i];
+ vb = &nv50->vtxbuf[b];
+
+ if (likely(vb->stride) || !(nv50->vbo_user & (1 << b)))
+ PUSH_DATA(push, ve->state);
+ else
+ PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST);
+ }
+ for (; i < n; ++i)
+ PUSH_DATA(push, NV50_3D_VERTEX_ATTRIB_INACTIVE);
+
+ /* update per-instance enables */
+ mask = vertex->instance_elts ^ nv50->state.instance_elts;
+ while (mask) {
+ const int i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_PER_INSTANCE(i)), 1);
+ PUSH_DATA (push, (vertex->instance_elts >> i) & 1);
+ }
+ nv50->state.instance_elts = vertex->instance_elts;
+
+ if (nv50->vbo_user & ~nv50->vbo_constant)
+ nv50_upload_user_buffers(nv50, addrs, limits);
+
+ /* update buffers and set constant attributes */
+ for (i = 0; i < vertex->num_elements; ++i) {
+ uint64_t address, limit;
+ const unsigned b = vertex->element[i].pipe.vertex_buffer_index;
+ ve = &vertex->element[i];
+ vb = &nv50->vtxbuf[b];
+
+ if (unlikely(nv50->vbo_constant & (1 << b))) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ nv50_emit_vtxattr(nv50, vb, &ve->pipe, i);
+ continue;
+ } else
+ if (nv50->vbo_user & (1 << b)) {
+ address = addrs[b] + ve->pipe.src_offset;
+ limit = addrs[b] + limits[b];
+ } else {
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+ if (!(refd & (1 << b))) {
+ refd |= 1 << b;
+ BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+ }
+ address = buf->address + vb->buffer_offset + ve->pipe.src_offset;
+ limit = buf->address + buf->base.width0 - 1;
+ }
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4);
+ PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ PUSH_DATA (push, ve->pipe.instance_divisor);
+ } else {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3);
+ PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ }
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ PUSH_DATAh(push, limit);
+ PUSH_DATA (push, limit);
+ }
+ for (; i < nv50->state.num_vtxelts; ++i) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ }
+ nv50->state.num_vtxelts = vertex->num_elements;
+}
+
+#define NV50_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nv50_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NV50_PRIM_GL_CASE(POINTS);
+ NV50_PRIM_GL_CASE(LINES);
+ NV50_PRIM_GL_CASE(LINE_LOOP);
+ NV50_PRIM_GL_CASE(LINE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLES);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NV50_PRIM_GL_CASE(TRIANGLE_FAN);
+ NV50_PRIM_GL_CASE(QUADS);
+ NV50_PRIM_GL_CASE(QUAD_STRIP);
+ NV50_PRIM_GL_CASE(POLYGON);
+ NV50_PRIM_GL_CASE(LINES_ADJACENCY);
+ NV50_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NV50_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ default:
+ return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+/* For pre-nva0 transform feedback. */
+static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =
+{
+ [PIPE_PRIM_POINTS] = 1,
+ [PIPE_PRIM_LINES] = 2,
+ [PIPE_PRIM_LINE_LOOP] = 2,
+ [PIPE_PRIM_LINE_STRIP] = 2,
+ [PIPE_PRIM_TRIANGLES] = 3,
+ [PIPE_PRIM_TRIANGLE_STRIP] = 3,
+ [PIPE_PRIM_TRIANGLE_FAN] = 3,
+ [PIPE_PRIM_QUADS] = 3,
+ [PIPE_PRIM_QUAD_STRIP] = 3,
+ [PIPE_PRIM_POLYGON] = 3,
+ [PIPE_PRIM_LINES_ADJACENCY] = 2,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3
+};
+
+static void
+nv50_draw_arrays(struct nv50_context *nv50,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned prim;
+
+ if (nv50->state.index_bias) {
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, 0);
+ nv50->state.index_bias = 0;
+ }
+
+ prim = nv50_prim_gl(mode);
+
+ while (instance_count--) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, start);
+ PUSH_DATA (push, count);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ PUSH_DATA(push, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U32), nr);
+ PUSH_DATAp(push, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
+}
+
+static void
+nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
+ const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count--;
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ BEGIN_NI04(push, NV50_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ unsigned prim;
+ const unsigned index_size = nv50->idxbuf.index_size;
+
+ prim = nv50_prim_gl(mode);
+
+ if (index_bias != nv50->state.index_bias) {
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, index_bias);
+ nv50->state.index_bias = index_bias;
+ }
+
+ if (nv50->idxbuf.buffer) {
+ struct nv04_resource *buf = nv04_resource(nv50->idxbuf.buffer);
+ unsigned pb_start;
+ unsigned pb_bytes;
+ const unsigned base = (buf->offset + nv50->idxbuf.offset) & ~3;
+
+ start += ((buf->offset + nv50->idxbuf.offset) & 3) >> (index_size >> 1);
+
+ assert(nouveau_resource_mapped_by_gpu(nv50->idxbuf.buffer));
+
+ while (instance_count--) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+
+ nouveau_pushbuf_space(push, 8, 0, 1);
+
+ switch (index_size) {
+ case 4:
+ BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U32), count);
+ nouveau_pushbuf_data(push, buf->bo, base + start * 4, count * 4);
+ break;
+ case 2:
+ pb_start = (start & ~1) * 2;
+ pb_bytes = ((start + count + 1) & ~1) * 2 - pb_start;
+
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1);
+ PUSH_DATA (push, (start << 31) | count);
+ BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U16), pb_bytes / 4);
+ nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes);
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U16_SETUP), 1);
+ PUSH_DATA (push, 0);
+ break;
+ default:
+ assert(index_size == 1);
+ pb_start = start & ~3;
+ pb_bytes = ((start + count + 3) & ~3) - pb_start;
+
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1);
+ PUSH_DATA (push, (start << 30) | count);
+ BEGIN_NL50(push, NV50_3D(VB_ELEMENT_U8), pb_bytes / 4);
+ nouveau_pushbuf_data(push, buf->bo, base + pb_start, pb_bytes);
+ BEGIN_NV04(push, NV50_3D(VB_ELEMENT_U8_SETUP), 1);
+ PUSH_DATA (push, 0);
+ break;
+ }
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ } else {
+ const void *data = nv50->idxbuf.user_buffer;
+
+ while (instance_count--) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ switch (index_size) {
+ case 1:
+ nv50_draw_elements_inline_u08(push, data, start, count);
+ break;
+ case 2:
+ nv50_draw_elements_inline_u16(push, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nv50_draw_elements_inline_u32_short(push, data, start, count);
+ else
+ nv50_draw_elements_inline_u32(push, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+}
+
+static void
+nva0_draw_stream_output(struct nv50_context *nv50,
+ const struct pipe_draw_info *info)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output);
+ struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+ unsigned num_instances = info->instance_count;
+ unsigned mode = nv50_prim_gl(info->mode);
+
+ if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {
+ /* A proper implementation without waiting doesn't seem possible,
+ * so don't bother.
+ */
+ NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+ return;
+ }
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ assert(num_instances);
+ do {
+ PUSH_SPACE(push, 8);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
+ nv50_query_pushbuf_submit(push, so->pq, 0x4);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ } while (--num_instances);
+}
+
+static void
+nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
+{
+ struct nv50_screen *screen = chan->user_priv;
+
+ nouveau_fence_update(&screen->base, TRUE);
+
+ nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE);
+}
+
+void
+nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
+ nv50->vb_elt_first = info->min_index + info->index_bias;
+ nv50->vb_elt_limit = info->max_index - info->min_index;
+ nv50->instance_off = info->start_instance;
+ nv50->instance_max = info->instance_count - 1;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nv50->vbo_push_hint = /* the 64 is heuristic */
+ !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count));
+
+ if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) {
+ if (!!nv50->vbo_fifo != nv50->vbo_push_hint)
+ nv50->dirty |= NV50_NEW_ARRAYS;
+ else
+ if (!nv50->vbo_fifo)
+ nv50_update_user_vbufs(nv50);
+ }
+
+ if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
+ nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
+
+ nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
+
+ push->kick_notify = nv50_draw_vbo_kick_notify;
+
+ if (nv50->vbo_fifo) {
+ nv50_push_vbo(nv50, info);
+ push->kick_notify = nv50_default_kick_notify;
+ nouveau_pushbuf_bufctx(push, NULL);
+ return;
+ }
+
+ if (nv50->state.instance_base != info->start_instance) {
+ nv50->state.instance_base = info->start_instance;
+ /* NOTE: this does not affect the shader input, should it ? */
+ BEGIN_NV04(push, NV50_3D(VB_INSTANCE_BASE), 1);
+ PUSH_DATA (push, info->start_instance);
+ }
+
+ if (nv50->base.vbo_dirty) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ nv50->base.vbo_dirty = FALSE;
+ }
+
+ if (info->indexed) {
+ boolean shorten = info->max_index <= 65535;
+
+ if (info->primitive_restart != nv50->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+ nv50->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_NV04(push, NV50_3D(PRIM_RESTART_INDEX), 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nv50_draw_elements(nv50, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ } else
+ if (unlikely(info->count_from_stream_output)) {
+ nva0_draw_stream_output(nv50, info);
+ } else {
+ nv50_draw_arrays(nv50,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ }
+ push->kick_notify = nv50_default_kick_notify;
+
+ nv50_release_user_vbufs(nv50);
+
+ nouveau_pushbuf_bufctx(push, NULL);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
new file mode 100644
index 00000000000..e8578c8be6f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -0,0 +1,125 @@
+
+#ifndef __NV50_WINSYS_H__
+#define __NV50_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pipe/p_defines.h"
+
+#include "nouveau_winsys.h"
+#include "nouveau_buffer.h"
+
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+
+static INLINE void
+nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
+ unsigned flags, struct nouveau_bo *bo)
+{
+ nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
+}
+
+static INLINE void
+nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
+ struct nv04_resource *res, unsigned flags)
+{
+ struct nouveau_bufref *ref =
+ nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain);
+ ref->priv = res;
+ ref->priv_data = flags;
+}
+
+#define BCTX_REFN_bo(ctx, bin, fl, bo) \
+ nv50_add_bufctx_resident_bo(ctx, NV50_BIND_##bin, fl, bo);
+
+#define BCTX_REFN(bctx, bin, res, acc) \
+ nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc)
+
+static INLINE void
+PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
+{
+ struct nouveau_pushbuf_refn ref = { bo, flags };
+ nouveau_pushbuf_refn(push, &ref, 1);
+}
+
+
+#define SUBC_3D(m) 3, (m)
+#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
+
+#define SUBC_2D(m) 4, (m)
+#define NV50_2D(n) SUBC_2D(NV50_2D_##n)
+
+#define SUBC_M2MF(m) 5, (m)
+#define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n)
+
+#define SUBC_COMPUTE(m) 6, (m)
+#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
+
+
+static INLINE uint32_t
+NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
+{
+ return 0x00000000 | (size << 18) | (subc << 13) | mthd;
+}
+
+static INLINE uint32_t
+NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
+{
+ return 0x40000000 | (size << 18) | (subc << 13) | mthd;
+}
+
+static INLINE uint32_t
+NV50_FIFO_PKHDR_L(int subc, int mthd)
+{
+ return 0x00030000 | (subc << 13) | mthd;
+}
+
+
+static INLINE uint32_t
+nouveau_bo_memtype(const struct nouveau_bo *bo)
+{
+ return bo->config.nv50.memtype;
+}
+
+
+static INLINE void
+PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
+{
+ *push->cur++ = (uint32_t)(data >> 32);
+}
+
+static INLINE void
+BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size));
+}
+
+static INLINE void
+BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NV50_FIFO_PKHDR_NI(subc, mthd, size));
+}
+
+/* long, non-incremental, nv50-only */
+static INLINE void
+BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size)
+{
+#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, 2);
+#endif
+ PUSH_DATA (push, NV50_FIFO_PKHDR_L(subc, mthd));
+ PUSH_DATA (push, size);
+}
+
+#endif /* __NV50_WINSYS_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.c b/src/gallium/drivers/nouveau/nv50/nv84_video.c
new file mode 100644
index 00000000000..3fee6d95f66
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c
@@ -0,0 +1,797 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include "util/u_format.h"
+#include "util/u_sampler.h"
+#include "vl/vl_zscan.h"
+
+#include "nv50/nv84_video.h"
+
+static int
+nv84_copy_firmware(const char *path, void *dest, ssize_t len)
+{
+ int fd = open(path, O_RDONLY | O_CLOEXEC);
+ ssize_t r;
+ if (fd < 0) {
+ fprintf(stderr, "opening firmware file %s failed: %m\n", path);
+ return 1;
+ }
+ r = read(fd, dest, len);
+ close(fd);
+
+ if (r != len) {
+ fprintf(stderr, "reading firwmare file %s failed: %m\n", path);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+filesize(const char *path)
+{
+ int ret;
+ struct stat statbuf;
+
+ ret = stat(path, &statbuf);
+ if (ret)
+ return ret;
+ return statbuf.st_size;
+}
+
+static struct nouveau_bo *
+nv84_load_firmwares(struct nouveau_device *dev, struct nv84_decoder *dec,
+ const char *fw1, const char *fw2)
+{
+ int ret, size1, size2 = 0;
+ struct nouveau_bo *fw;
+
+ size1 = filesize(fw1);
+ if (fw2)
+ size2 = filesize(fw2);
+ if (size1 < 0 || size2 < 0)
+ return NULL;
+
+ dec->vp_fw2_offset = align(size1, 0x100);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, dec->vp_fw2_offset + size2, NULL, &fw);
+ if (ret)
+ return NULL;
+ ret = nouveau_bo_map(fw, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto error;
+
+ ret = nv84_copy_firmware(fw1, fw->map, size1);
+ if (fw2 && !ret)
+ ret = nv84_copy_firmware(fw2, fw->map + dec->vp_fw2_offset, size2);
+ munmap(fw->map, fw->size);
+ fw->map = NULL;
+ if (!ret)
+ return fw;
+error:
+ nouveau_bo_ref(NULL, &fw);
+ return NULL;
+}
+
+static struct nouveau_bo *
+nv84_load_bsp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec)
+{
+ return nv84_load_firmwares(
+ dev, dec, "/lib/firmware/nouveau/nv84_bsp-h264", NULL);
+}
+
+static struct nouveau_bo *
+nv84_load_vp_firmware(struct nouveau_device *dev, struct nv84_decoder *dec)
+{
+ return nv84_load_firmwares(
+ dev, dec,
+ "/lib/firmware/nouveau/nv84_vp-h264-1",
+ "/lib/firmware/nouveau/nv84_vp-h264-2");
+}
+
+static struct nouveau_bo *
+nv84_load_vp_firmware_mpeg(struct nouveau_device *dev, struct nv84_decoder *dec)
+{
+ return nv84_load_firmwares(
+ dev, dec, "/lib/firmware/nouveau/nv84_vp-mpeg12", NULL);
+}
+
+static void
+nv84_decoder_decode_bitstream_h264(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+ struct nv84_video_buffer *target = (struct nv84_video_buffer *)video_target;
+
+ struct pipe_h264_picture_desc *desc = (struct pipe_h264_picture_desc *)picture;
+
+ assert(target->base.buffer_format == PIPE_FORMAT_NV12);
+
+ nv84_decoder_bsp(dec, desc, num_buffers, data, num_bytes, target);
+ nv84_decoder_vp_h264(dec, desc, target);
+}
+
+static void
+nv84_decoder_flush(struct pipe_video_codec *decoder)
+{
+}
+
+static void
+nv84_decoder_begin_frame_h264(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+}
+
+static void
+nv84_decoder_end_frame_h264(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+}
+
+static void
+nv84_decoder_decode_bitstream_mpeg12(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+
+ assert(video_target->buffer_format == PIPE_FORMAT_NV12);
+
+ vl_mpg12_bs_decode(dec->mpeg12_bs,
+ video_target,
+ (struct pipe_mpeg12_picture_desc *)picture,
+ num_buffers,
+ data,
+ num_bytes);
+}
+
+static void
+nv84_decoder_begin_frame_mpeg12(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+ struct pipe_mpeg12_picture_desc *desc = (struct pipe_mpeg12_picture_desc *)picture;
+ int i;
+
+ nouveau_bo_wait(dec->mpeg12_bo, NOUVEAU_BO_RDWR, dec->client);
+ dec->mpeg12_mb_info = dec->mpeg12_bo->map + 0x100;
+ dec->mpeg12_data = dec->mpeg12_bo->map + 0x100 +
+ align(0x20 * mb(dec->base.width) * mb(dec->base.height), 0x100);
+ if (desc->intra_matrix) {
+ dec->zscan = desc->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+ for (i = 0; i < 64; i++) {
+ dec->mpeg12_intra_matrix[i] = desc->intra_matrix[dec->zscan[i]];
+ dec->mpeg12_non_intra_matrix[i] = desc->non_intra_matrix[dec->zscan[i]];
+ }
+ dec->mpeg12_intra_matrix[0] = 1 << (7 - desc->intra_dc_precision);
+ }
+}
+
+static void
+nv84_decoder_end_frame_mpeg12(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture)
+{
+ nv84_decoder_vp_mpeg12(
+ (struct nv84_decoder *)decoder,
+ (struct pipe_mpeg12_picture_desc *)picture,
+ (struct nv84_video_buffer *)target);
+}
+
+static void
+nv84_decoder_decode_macroblock(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *target,
+ struct pipe_picture_desc *picture,
+ const struct pipe_macroblock *macroblocks,
+ unsigned num_macroblocks)
+{
+ const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks;
+ for (int i = 0; i < num_macroblocks; i++, mb++) {
+ nv84_decoder_vp_mpeg12_mb(
+ (struct nv84_decoder *)decoder,
+ (struct pipe_mpeg12_picture_desc *)picture,
+ mb);
+ }
+}
+
+static void
+nv84_decoder_destroy(struct pipe_video_codec *decoder)
+{
+ struct nv84_decoder *dec = (struct nv84_decoder *)decoder;
+
+ nouveau_bo_ref(NULL, &dec->bsp_fw);
+ nouveau_bo_ref(NULL, &dec->bsp_data);
+ nouveau_bo_ref(NULL, &dec->vp_fw);
+ nouveau_bo_ref(NULL, &dec->vp_data);
+ nouveau_bo_ref(NULL, &dec->mbring);
+ nouveau_bo_ref(NULL, &dec->vpring);
+ nouveau_bo_ref(NULL, &dec->bitstream);
+ nouveau_bo_ref(NULL, &dec->vp_params);
+ nouveau_bo_ref(NULL, &dec->fence);
+
+ nouveau_object_del(&dec->bsp);
+ nouveau_object_del(&dec->vp);
+
+ nouveau_bufctx_del(&dec->bsp_bufctx);
+ nouveau_pushbuf_del(&dec->bsp_pushbuf);
+ nouveau_object_del(&dec->bsp_channel);
+
+ nouveau_bufctx_del(&dec->vp_bufctx);
+ nouveau_pushbuf_del(&dec->vp_pushbuf);
+ nouveau_object_del(&dec->vp_channel);
+
+ nouveau_client_del(&dec->client);
+
+ if (dec->mpeg12_bs)
+ FREE(dec->mpeg12_bs);
+ FREE(dec);
+}
+
+struct pipe_video_codec *
+nv84_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ struct nv50_context *nv50 = (struct nv50_context *)context;
+ struct nouveau_screen *screen = &nv50->screen->base;
+ struct nv84_decoder *dec;
+ struct nouveau_pushbuf *bsp_push, *vp_push;
+ struct nv50_surface surf;
+ struct nv50_miptree mip;
+ union pipe_color_union color;
+ struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
+ int ret, i;
+ int is_h264 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC;
+ int is_mpeg12 = u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_MPEG12;
+
+ if (getenv("XVMC_VL"))
+ return vl_create_decoder(context, templ);
+
+ if ((is_h264 && templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) ||
+ (is_mpeg12 && templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT)) {
+ debug_printf("%x\n", templ->entrypoint);
+ return NULL;
+ }
+
+ if (!is_h264 && !is_mpeg12) {
+ debug_printf("invalid profile: %x\n", templ->profile);
+ return NULL;
+ }
+
+ dec = CALLOC_STRUCT(nv84_decoder);
+ if (!dec)
+ return NULL;
+
+ dec->base = *templ;
+ dec->base.context = context;
+ dec->base.destroy = nv84_decoder_destroy;
+ dec->base.flush = nv84_decoder_flush;
+ if (is_h264) {
+ dec->base.decode_bitstream = nv84_decoder_decode_bitstream_h264;
+ dec->base.begin_frame = nv84_decoder_begin_frame_h264;
+ dec->base.end_frame = nv84_decoder_end_frame_h264;
+
+ dec->frame_mbs = mb(dec->base.width) * mb_half(dec->base.height) * 2;
+ dec->frame_size = dec->frame_mbs << 8;
+ dec->vpring_deblock = align(0x30 * dec->frame_mbs, 0x100);
+ dec->vpring_residual = 0x2000 + MAX2(0x32000, 0x600 * dec->frame_mbs);
+ dec->vpring_ctrl = MAX2(0x10000, align(0x1080 + 0x144 * dec->frame_mbs, 0x100));
+ } else if (is_mpeg12) {
+ dec->base.decode_macroblock = nv84_decoder_decode_macroblock;
+ dec->base.begin_frame = nv84_decoder_begin_frame_mpeg12;
+ dec->base.end_frame = nv84_decoder_end_frame_mpeg12;
+
+ if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ dec->mpeg12_bs = CALLOC_STRUCT(vl_mpg12_bs);
+ if (!dec->mpeg12_bs)
+ goto fail;
+ vl_mpg12_bs_init(dec->mpeg12_bs, &dec->base);
+ dec->base.decode_bitstream = nv84_decoder_decode_bitstream_mpeg12;
+ }
+ } else {
+ goto fail;
+ }
+
+ ret = nouveau_client_new(screen->device, &dec->client);
+ if (ret)
+ goto fail;
+
+ if (is_h264) {
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ &nv04_data, sizeof(nv04_data), &dec->bsp_channel);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_pushbuf_new(dec->client, dec->bsp_channel, 4,
+ 32 * 1024, true, &dec->bsp_pushbuf);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_bufctx_new(dec->client, 1, &dec->bsp_bufctx);
+ if (ret)
+ goto fail;
+ }
+
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ &nv04_data, sizeof(nv04_data), &dec->vp_channel);
+ if (ret)
+ goto fail;
+ ret = nouveau_pushbuf_new(dec->client, dec->vp_channel, 4,
+ 32 * 1024, true, &dec->vp_pushbuf);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_bufctx_new(dec->client, 1, &dec->vp_bufctx);
+ if (ret)
+ goto fail;
+
+ bsp_push = dec->bsp_pushbuf;
+ vp_push = dec->vp_pushbuf;
+
+ if (is_h264) {
+ dec->bsp_fw = nv84_load_bsp_firmware(screen->device, dec);
+ dec->vp_fw = nv84_load_vp_firmware(screen->device, dec);
+ if (!dec->bsp_fw || !dec->vp_fw)
+ goto fail;
+ }
+ if (is_mpeg12) {
+ dec->vp_fw = nv84_load_vp_firmware_mpeg(screen->device, dec);
+ if (!dec->vp_fw)
+ goto fail;
+ }
+
+ if (is_h264) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0, 0x40000, NULL, &dec->bsp_data);
+ if (ret)
+ goto fail;
+ }
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0, 0x40000, NULL, &dec->vp_data);
+ if (ret)
+ goto fail;
+ if (is_h264) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0,
+ 2 * (dec->vpring_deblock +
+ dec->vpring_residual +
+ dec->vpring_ctrl +
+ 0x1000),
+ NULL, &dec->vpring);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP,
+ 0,
+ (templ->max_references + 1) * dec->frame_mbs * 0x40 +
+ dec->frame_size + 0x2000,
+ NULL, &dec->mbring);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART,
+ 0, 2 * (0x700 + MAX2(0x40000, 0x800 + 0x180 * dec->frame_mbs)),
+ NULL, &dec->bitstream);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->bitstream, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART,
+ 0, 0x2000, NULL, &dec->vp_params);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->vp_params, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ }
+ if (is_mpeg12) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART,
+ 0,
+ align(0x20 * mb(templ->width) * mb(templ->height), 0x100) +
+ (6 * 64 * 8) * mb(templ->width) * mb(templ->height) + 0x100,
+ NULL, &dec->mpeg12_bo);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->mpeg12_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0, 0x1000, NULL, &dec->fence);
+ if (ret)
+ goto fail;
+ ret = nouveau_bo_map(dec->fence, NOUVEAU_BO_WR, dec->client);
+ if (ret)
+ goto fail;
+ *(uint32_t *)dec->fence->map = 0;
+
+ if (is_h264) {
+ nouveau_pushbuf_bufctx(bsp_push, dec->bsp_bufctx);
+ nouveau_bufctx_refn(dec->bsp_bufctx, 0,
+ dec->bsp_fw, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(dec->bsp_bufctx, 0,
+ dec->bsp_data, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ }
+
+ nouveau_pushbuf_bufctx(vp_push, dec->vp_bufctx);
+ nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_fw,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(dec->vp_bufctx, 0, dec->vp_data,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+
+ if (is_h264 && !ret)
+ ret = nouveau_object_new(dec->bsp_channel, 0xbeef74b0, 0x74b0,
+ NULL, 0, &dec->bsp);
+
+ if (!ret)
+ ret = nouveau_object_new(dec->vp_channel, 0xbeef7476, 0x7476,
+ NULL, 0, &dec->vp);
+
+ if (ret)
+ goto fail;
+
+
+ if (is_h264) {
+ /* Zero out some parts of mbring/vpring. there's gotta be some cleaner way
+ * of doing this... perhaps makes sense to just copy the relevant logic
+ * here. */
+ color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0;
+ surf.offset = dec->frame_size;
+ surf.width = 64;
+ surf.height = (templ->max_references + 1) * dec->frame_mbs / 4;
+ surf.depth = 1;
+ surf.base.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ surf.base.u.tex.level = 0;
+ surf.base.texture = &mip.base.base;
+ mip.level[0].tile_mode = 0;
+ mip.level[0].pitch = surf.width * 4;
+ mip.base.domain = NOUVEAU_BO_VRAM;
+ mip.base.bo = dec->mbring;
+ context->clear_render_target(context, &surf.base, &color, 0, 0, 64, 4760);
+ surf.offset = dec->vpring->size / 2 - 0x1000;
+ surf.width = 1024;
+ surf.height = 1;
+ mip.level[0].pitch = surf.width * 4;
+ mip.base.bo = dec->vpring;
+ context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1);
+ surf.offset = dec->vpring->size - 0x1000;
+ context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1);
+
+ PUSH_SPACE(screen->pushbuf, 5);
+ PUSH_REFN(screen->pushbuf, dec->fence, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+ /* The clear_render_target is done via 3D engine, so use it to write to a
+ * sempahore to indicate that it's done.
+ */
+ BEGIN_NV04(screen->pushbuf, NV50_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(screen->pushbuf, dec->fence->offset);
+ PUSH_DATA (screen->pushbuf, dec->fence->offset);
+ PUSH_DATA (screen->pushbuf, 1);
+ PUSH_DATA (screen->pushbuf, 0xf010);
+ PUSH_KICK (screen->pushbuf);
+
+ PUSH_SPACE(bsp_push, 2 + 12 + 2 + 4 + 3);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (bsp_push, dec->bsp->handle);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x180), 11);
+ for (i = 0; i < 11; i++)
+ PUSH_DATA(bsp_push, nv04_data.vram);
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x1b8), 1);
+ PUSH_DATA (bsp_push, nv04_data.vram);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x600), 3);
+ PUSH_DATAh(bsp_push, dec->bsp_fw->offset);
+ PUSH_DATA (bsp_push, dec->bsp_fw->offset);
+ PUSH_DATA (bsp_push, dec->bsp_fw->size);
+
+ BEGIN_NV04(bsp_push, SUBC_BSP(0x628), 2);
+ PUSH_DATA (bsp_push, dec->bsp_data->offset >> 8);
+ PUSH_DATA (bsp_push, dec->bsp_data->size);
+ PUSH_KICK (bsp_push);
+ }
+
+ PUSH_SPACE(vp_push, 2 + 12 + 2 + 4 + 3);
+
+ BEGIN_NV04(vp_push, SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (vp_push, dec->vp->handle);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x180), 11);
+ for (i = 0; i < 11; i++)
+ PUSH_DATA(vp_push, nv04_data.vram);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x1b8), 1);
+ PUSH_DATA (vp_push, nv04_data.vram);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x600), 3);
+ PUSH_DATAh(vp_push, dec->vp_fw->offset);
+ PUSH_DATA (vp_push, dec->vp_fw->offset);
+ PUSH_DATA (vp_push, dec->vp_fw->size);
+
+ BEGIN_NV04(vp_push, SUBC_VP(0x628), 2);
+ PUSH_DATA (vp_push, dec->vp_data->offset >> 8);
+ PUSH_DATA (vp_push, dec->vp_data->size);
+ PUSH_KICK (vp_push);
+
+ return &dec->base;
+fail:
+ nv84_decoder_destroy(&dec->base);
+ return NULL;
+}
+
+static struct pipe_sampler_view **
+nv84_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ return buf->sampler_view_planes;
+}
+
+static struct pipe_sampler_view **
+nv84_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ return buf->sampler_view_components;
+}
+
+static struct pipe_surface **
+nv84_video_buffer_surfaces(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ return buf->surfaces;
+}
+
+static void
+nv84_video_buffer_destroy(struct pipe_video_buffer *buffer)
+{
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)buffer;
+ unsigned i;
+
+ assert(buf);
+
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ pipe_resource_reference(&buf->resources[i], NULL);
+ pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+ pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
+ pipe_surface_reference(&buf->surfaces[i * 2], NULL);
+ pipe_surface_reference(&buf->surfaces[i * 2 + 1], NULL);
+ }
+
+ nouveau_bo_ref(NULL, &buf->interlaced);
+ nouveau_bo_ref(NULL, &buf->full);
+
+ FREE(buffer);
+}
+
+struct pipe_video_buffer *
+nv84_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *template)
+{
+ struct nv84_video_buffer *buffer;
+ struct pipe_resource templ;
+ unsigned i, j, component;
+ struct pipe_sampler_view sv_templ;
+ struct pipe_surface surf_templ;
+ struct nv50_miptree *mt0, *mt1;
+ struct nouveau_bo *empty = NULL;
+ struct nouveau_screen *screen = &((struct nv50_context *)pipe)->screen->base;
+ union nouveau_bo_config cfg;
+ unsigned bo_size;
+
+ if (getenv("XVMC_VL") || template->buffer_format != PIPE_FORMAT_NV12)
+ return vl_video_buffer_create(pipe, template);
+
+ if (!template->interlaced) {
+ debug_printf("Require interlaced video buffers\n");
+ return NULL;
+ }
+ if (template->chroma_format != PIPE_VIDEO_CHROMA_FORMAT_420) {
+ debug_printf("Must use 4:2:0 format\n");
+ return NULL;
+ }
+
+ /*
+ * Note that there are always going to be exactly two planes, one for Y,
+ * and one for UV. These are also the resources. VP expects these to be
+ * adjacent, so they need to belong to the same BO.
+ */
+
+ buffer = CALLOC_STRUCT(nv84_video_buffer);
+ if (!buffer) return NULL;
+
+ buffer->mvidx = -1;
+
+ buffer->base.buffer_format = template->buffer_format;
+ buffer->base.context = pipe;
+ buffer->base.destroy = nv84_video_buffer_destroy;
+ buffer->base.chroma_format = template->chroma_format;
+ buffer->base.width = template->width;
+ buffer->base.height = template->height;
+ buffer->base.get_sampler_view_planes = nv84_video_buffer_sampler_view_planes;
+ buffer->base.get_sampler_view_components = nv84_video_buffer_sampler_view_components;
+ buffer->base.get_surfaces = nv84_video_buffer_surfaces;
+ buffer->base.interlaced = true;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.target = PIPE_TEXTURE_2D_ARRAY;
+ templ.depth0 = 1;
+ templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+ templ.format = PIPE_FORMAT_R8_UNORM;
+ templ.width0 = align(template->width, 2);
+ templ.height0 = align(template->height, 4) / 2;
+ templ.flags = NV50_RESOURCE_FLAG_VIDEO | NV50_RESOURCE_FLAG_NOALLOC;
+ templ.array_size = 2;
+
+ cfg.nv50.tile_mode = 0x20;
+ cfg.nv50.memtype = 0x70;
+
+ buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ);
+ if (!buffer->resources[0])
+ goto error;
+
+ templ.format = PIPE_FORMAT_R8G8_UNORM;
+ templ.width0 /= 2;
+ templ.height0 /= 2;
+ buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
+ if (!buffer->resources[1])
+ goto error;
+
+ mt0 = nv50_miptree(buffer->resources[0]);
+ mt1 = nv50_miptree(buffer->resources[1]);
+
+ bo_size = mt0->total_size + mt1->total_size;
+ if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0,
+ bo_size, &cfg, &buffer->interlaced))
+ goto error;
+ /* XXX Change reference frame management so that this is only allocated in
+ * the decoder when necessary. */
+ if (nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM | NOUVEAU_BO_NOSNOOP, 0,
+ bo_size, &cfg, &buffer->full))
+ goto error;
+
+ mt0->base.bo = buffer->interlaced;
+ mt0->base.domain = NOUVEAU_BO_VRAM;
+ mt0->base.offset = 0;
+ mt0->base.address = buffer->interlaced->offset + mt0->base.offset;
+ nouveau_bo_ref(buffer->interlaced, &empty);
+
+ mt1->base.bo = buffer->interlaced;
+ mt1->base.domain = NOUVEAU_BO_VRAM;
+ mt1->base.offset = mt0->layer_stride * 2;
+ mt1->base.address = buffer->interlaced->offset + mt1->base.offset;
+ nouveau_bo_ref(buffer->interlaced, &empty);
+
+ memset(&sv_templ, 0, sizeof(sv_templ));
+ for (component = 0, i = 0; i < 2; ++i ) {
+ struct pipe_resource *res = buffer->resources[i];
+ unsigned nr_components = util_format_get_nr_components(res->format);
+
+ u_sampler_view_default_template(&sv_templ, res, res->format);
+ buffer->sampler_view_planes[i] =
+ pipe->create_sampler_view(pipe, res, &sv_templ);
+ if (!buffer->sampler_view_planes[i])
+ goto error;
+
+ for (j = 0; j < nr_components; ++j, ++component) {
+ sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b =
+ PIPE_SWIZZLE_RED + j;
+ sv_templ.swizzle_a = PIPE_SWIZZLE_ONE;
+
+ buffer->sampler_view_components[component] =
+ pipe->create_sampler_view(pipe, res, &sv_templ);
+ if (!buffer->sampler_view_components[component])
+ goto error;
+ }
+ }
+
+ memset(&surf_templ, 0, sizeof(surf_templ));
+ for (j = 0; j < 2; ++j) {
+ surf_templ.format = buffer->resources[j]->format;
+ surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 0;
+ buffer->surfaces[j * 2] =
+ pipe->create_surface(pipe, buffer->resources[j], &surf_templ);
+ if (!buffer->surfaces[j * 2])
+ goto error;
+
+ surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 1;
+ buffer->surfaces[j * 2 + 1] =
+ pipe->create_surface(pipe, buffer->resources[j], &surf_templ);
+ if (!buffer->surfaces[j * 2 + 1])
+ goto error;
+ }
+
+ return &buffer->base;
+
+error:
+ nv84_video_buffer_destroy(&buffer->base);
+ return NULL;
+}
+
+int
+nv84_screen_get_video_param(struct pipe_screen *pscreen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC ||
+ u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG12;
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return 2048;
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ return true;
+ case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+ return false;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ switch (profile) {
+ case PIPE_VIDEO_PROFILE_MPEG1:
+ return 0;
+ case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+ case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+ return 3;
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+ case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+ return 41;
+ default:
+ debug_printf("unknown video profile: %d\n", profile);
+ return 0;
+ }
+ default:
+ debug_printf("unknown video param: %d\n", param);
+ return 0;
+ }
+}
+
+boolean
+nv84_screen_video_supported(struct pipe_screen *screen,
+ enum pipe_format format,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint)
+{
+ if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
+ return format == PIPE_FORMAT_NV12;
+
+ return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.h b/src/gallium/drivers/nouveau/nv50/nv84_video.h
new file mode 100644
index 00000000000..2edba389dbf
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NV84_VIDEO_H_
+#define NV84_VIDEO_H_
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "vl/vl_types.h"
+
+#include "vl/vl_mpeg12_bitstream.h"
+
+#include "util/u_video.h"
+
+#include "nv50/nv50_context.h"
+
+/* These are expected to be on their own pushbufs */
+#define SUBC_BSP(m) 2, (m)
+#define SUBC_VP(m) 2, (m)
+
+union pipe_desc {
+ struct pipe_picture_desc *base;
+ struct pipe_mpeg12_picture_desc *mpeg12;
+ struct pipe_mpeg4_picture_desc *mpeg4;
+ struct pipe_vc1_picture_desc *vc1;
+ struct pipe_h264_picture_desc *h264;
+};
+
+struct nv84_video_buffer {
+ struct pipe_video_buffer base;
+ struct pipe_resource *resources[VL_NUM_COMPONENTS];
+ struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS];
+ struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS];
+ struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2];
+
+ struct nouveau_bo *interlaced, *full;
+ int mvidx;
+ unsigned frame_num, frame_num_max;
+};
+
+struct nv84_decoder {
+ struct pipe_video_codec base;
+ struct nouveau_client *client;
+ struct nouveau_object *bsp_channel, *vp_channel, *bsp, *vp;
+ struct nouveau_pushbuf *bsp_pushbuf, *vp_pushbuf;
+ struct nouveau_bufctx *bsp_bufctx, *vp_bufctx;
+
+ struct nouveau_bo *bsp_fw, *bsp_data;
+ struct nouveau_bo *vp_fw, *vp_data;
+ struct nouveau_bo *mbring, *vpring;
+
+ /*
+ * states:
+ * 0: init
+ * 1: vpring/mbring cleared, bsp is ready
+ * 2: bsp is done, vp is ready
+ * and then vp it back to 1
+ */
+ struct nouveau_bo *fence;
+
+ struct nouveau_bo *bitstream;
+ struct nouveau_bo *vp_params;
+
+ size_t vp_fw2_offset;
+
+ unsigned frame_mbs, frame_size;
+ /* VPRING layout:
+ RESIDUAL
+ CTRL
+ DEBLOCK
+ 0x1000
+ */
+ unsigned vpring_deblock, vpring_residual, vpring_ctrl;
+
+
+ struct vl_mpg12_bs *mpeg12_bs;
+
+ struct nouveau_bo *mpeg12_bo;
+ void *mpeg12_mb_info;
+ uint16_t *mpeg12_data;
+ const int *zscan;
+ uint8_t mpeg12_intra_matrix[64];
+ uint8_t mpeg12_non_intra_matrix[64];
+};
+
+static INLINE uint32_t mb(uint32_t coord)
+{
+ return (coord + 0xf)>>4;
+}
+
+static INLINE uint32_t mb_half(uint32_t coord)
+{
+ return (coord + 0x1f)>>5;
+}
+
+int
+nv84_decoder_bsp(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes,
+ struct nv84_video_buffer *dest);
+
+void
+nv84_decoder_vp_h264(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ struct nv84_video_buffer *dest);
+
+void
+nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ const struct pipe_mpeg12_macroblock *mb);
+
+void
+nv84_decoder_vp_mpeg12(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ struct nv84_video_buffer *dest);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c
new file mode 100644
index 00000000000..86047b5f463
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video_bsp.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv84_video.h"
+
+struct iparm {
+ struct iseqparm {
+ uint32_t chroma_format_idc; // 00
+ uint32_t pad[(0x128 - 0x4) / 4];
+ uint32_t log2_max_frame_num_minus4; // 128
+ uint32_t pic_order_cnt_type; // 12c
+ uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130
+ uint32_t delta_pic_order_always_zero_flag; // 134
+ uint32_t num_ref_frames; // 138
+ uint32_t pic_width_in_mbs_minus1; // 13c
+ uint32_t pic_height_in_map_units_minus1; // 140
+ uint32_t frame_mbs_only_flag; // 144
+ uint32_t mb_adaptive_frame_field_flag; // 148
+ uint32_t direct_8x8_inference_flag; // 14c
+ } iseqparm; // 000
+ struct ipicparm {
+ uint32_t entropy_coding_mode_flag; // 00
+ uint32_t pic_order_present_flag; // 04
+ uint32_t num_slice_groups_minus1; // 08
+ uint32_t slice_group_map_type; // 0c
+ uint32_t pad1[0x60 / 4];
+ uint32_t u70; // 70
+ uint32_t u74; // 74
+ uint32_t u78; // 78
+ uint32_t num_ref_idx_l0_active_minus1; // 7c
+ uint32_t num_ref_idx_l1_active_minus1; // 80
+ uint32_t weighted_pred_flag; // 84
+ uint32_t weighted_bipred_idc; // 88
+ uint32_t pic_init_qp_minus26; // 8c
+ uint32_t chroma_qp_index_offset; // 90
+ uint32_t deblocking_filter_control_present_flag; // 94
+ uint32_t constrained_intra_pred_flag; // 98
+ uint32_t redundant_pic_cnt_present_flag; // 9c
+ uint32_t transform_8x8_mode_flag; // a0
+ uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4];
+ uint32_t second_chroma_qp_index_offset; // 1c8
+ uint32_t u1cc; // 1cc
+ uint32_t curr_pic_order_cnt; // 1d0
+ uint32_t field_order_cnt[2]; // 1d4
+ uint32_t curr_mvidx; // 1dc
+ struct iref {
+ uint32_t u00; // 00
+ uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom
+ uint8_t is_long_term; // 08
+ uint8_t non_existing; // 09
+ uint32_t frame_idx; // 0c
+ uint32_t field_order_cnt[2]; // 10
+ uint32_t mvidx; // 18
+ uint8_t field_pic_flag; // 1c
+ // 20
+ } refs[0x10]; // 1e0
+ } ipicparm; // 150
+};
+
+int
+nv84_decoder_bsp(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes,
+ struct nv84_video_buffer *dest)
+{
+ struct iparm params;
+ uint32_t more_params[0x44 / 4] = {0};
+ unsigned total_bytes = 0;
+ int i;
+ static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0};
+ char indexes[17] = {0};
+ struct nouveau_pushbuf *push = dec->bsp_pushbuf;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
+ { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+
+ nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client);
+
+ STATIC_ASSERT(sizeof(struct iparm) == 0x530);
+
+ memset(&params, 0, sizeof(params));
+
+ dest->frame_num = dest->frame_num_max = desc->frame_num;
+
+ for (i = 0; i < 16; i++) {
+ struct iref *ref = &params.ipicparm.refs[i];
+ struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i];
+ if (!frame) break;
+ /* The frame index is relative to the last IDR frame. So once the frame
+ * num goes back to 0, previous reference frames need to have a negative
+ * index.
+ */
+ if (desc->frame_num >= frame->frame_num_max) {
+ frame->frame_num_max = desc->frame_num;
+ } else {
+ frame->frame_num -= frame->frame_num_max + 1;
+ frame->frame_num_max = desc->frame_num;
+ }
+ ref->non_existing = 0;
+ ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) |
+ (desc->bottom_is_reference[i] ? 2 : 0);
+ ref->is_long_term = desc->is_long_term[i];
+ ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0];
+ ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1];
+ ref->frame_idx = frame->frame_num;
+ ref->u00 = ref->mvidx = frame->mvidx;
+ ref->field_pic_flag = desc->field_pic_flag;
+ indexes[frame->mvidx] = 1;
+ }
+
+ /* Needs to be adjusted if we ever support non-4:2:0 videos */
+ params.iseqparm.chroma_format_idc = 1;
+
+ params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1;
+ if (desc->field_pic_flag || desc->mb_adaptive_frame_field_flag)
+ params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1;
+ else
+ params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1;
+
+ if (desc->bottom_field_flag)
+ params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1];
+ else
+ params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0];
+ params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0];
+ params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1];
+ if (desc->is_reference) {
+ if (dest->mvidx < 0) {
+ for (i = 0; i < desc->num_ref_frames + 1; i++) {
+ if (!indexes[i]) {
+ dest->mvidx = i;
+ break;
+ }
+ }
+ assert(i != desc->num_ref_frames + 1);
+ }
+
+ params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx;
+ }
+
+ params.iseqparm.num_ref_frames = desc->num_ref_frames;
+ params.iseqparm.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
+ params.ipicparm.constrained_intra_pred_flag = desc->constrained_intra_pred_flag;
+ params.ipicparm.weighted_pred_flag = desc->weighted_pred_flag;
+ params.ipicparm.weighted_bipred_idc = desc->weighted_bipred_idc;
+ params.iseqparm.frame_mbs_only_flag = desc->frame_mbs_only_flag;
+ params.ipicparm.transform_8x8_mode_flag = desc->transform_8x8_mode_flag;
+ params.ipicparm.chroma_qp_index_offset = desc->chroma_qp_index_offset;
+ params.ipicparm.second_chroma_qp_index_offset = desc->second_chroma_qp_index_offset;
+ params.ipicparm.pic_init_qp_minus26 = desc->pic_init_qp_minus26;
+ params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1;
+ params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1;
+ params.iseqparm.log2_max_frame_num_minus4 = desc->log2_max_frame_num_minus4;
+ params.iseqparm.pic_order_cnt_type = desc->pic_order_cnt_type;
+ params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->log2_max_pic_order_cnt_lsb_minus4;
+ params.iseqparm.delta_pic_order_always_zero_flag = desc->delta_pic_order_always_zero_flag;
+ params.iseqparm.direct_8x8_inference_flag = desc->direct_8x8_inference_flag;
+ params.ipicparm.entropy_coding_mode_flag = desc->entropy_coding_mode_flag;
+ params.ipicparm.pic_order_present_flag = desc->pic_order_present_flag;
+ params.ipicparm.deblocking_filter_control_present_flag = desc->deblocking_filter_control_present_flag;
+ params.ipicparm.redundant_pic_cnt_present_flag = desc->redundant_pic_cnt_present_flag;
+
+ memcpy(dec->bitstream->map, &params, sizeof(params));
+ for (i = 0; i < num_buffers; i++) {
+ assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700);
+ memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]);
+ total_bytes += num_bytes[i];
+ }
+ memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end));
+ total_bytes += sizeof(end);
+ more_params[1] = total_bytes;
+ memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params));
+
+ PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2);
+ nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0]));
+
+ /* Wait for the fence = 1 */
+ BEGIN_NV04(push, SUBC_BSP(0x10), 4);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 1);
+
+ /* TODO: Use both halves of bitstream/vpring for alternating frames */
+
+ /* Kick off the BSP */
+ BEGIN_NV04(push, SUBC_BSP(0x400), 20);
+ PUSH_DATA (push, dec->bitstream->offset >> 8);
+ PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7);
+ PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
+ PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, dec->mbring->offset >> 8);
+ PUSH_DATA (push, dec->frame_size);
+ PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8);
+ PUSH_DATA (push, dec->vpring->offset >> 8);
+ PUSH_DATA (push, dec->vpring->size / 2);
+ PUSH_DATA (push, dec->vpring_residual);
+ PUSH_DATA (push, dec->vpring_ctrl);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, dec->vpring_residual);
+ PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl);
+ PUSH_DATA (push, dec->vpring_deblock);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
+ dec->vpring_residual + dec->vpring_deblock) >> 8);
+ PUSH_DATA (push, 0x654321);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x100008);
+
+ BEGIN_NV04(push, SUBC_BSP(0x620), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ /* Write fence = 2, intr */
+ BEGIN_NV04(push, SUBC_BSP(0x610), 3);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 2);
+
+ BEGIN_NV04(push, SUBC_BSP(0x304), 1);
+ PUSH_DATA (push, 0x101);
+ PUSH_KICK (push);
+ return 0;
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
new file mode 100644
index 00000000000..619aa4e7a40
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright 2013 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv84_video.h"
+
+#include "util/u_sse.h"
+
+struct h264_iparm1 {
+ uint8_t scaling_lists_4x4[6][16]; // 00
+ uint8_t scaling_lists_8x8[2][64]; // 60
+ uint32_t width; // e0
+ uint32_t height; // e4
+ uint64_t ref1_addrs[16]; // e8
+ uint64_t ref2_addrs[16]; // 168
+ uint32_t unk1e8;
+ uint32_t unk1ec;
+ uint32_t w1; // 1f0
+ uint32_t w2; // 1f4
+ uint32_t w3; // 1f8
+ uint32_t h1; // 1fc
+ uint32_t h2; // 200
+ uint32_t h3; // 204
+ uint32_t mb_adaptive_frame_field_flag; // 208
+ uint32_t field_pic_flag; // 20c
+ uint32_t format; // 210
+ uint32_t unk214; // 214
+};
+
+struct h264_iparm2 {
+ uint32_t width; // 00
+ uint32_t height; // 04
+ uint32_t mbs; // 08
+ uint32_t w1; // 0c
+ uint32_t w2; // 10
+ uint32_t w3; // 14
+ uint32_t h1; // 18
+ uint32_t h2; // 1c
+ uint32_t h3; // 20
+ uint32_t unk24;
+ uint32_t mb_adaptive_frame_field_flag; // 28
+ uint32_t top; // 2c
+ uint32_t bottom; // 30
+ uint32_t is_reference; // 34
+};
+
+void
+nv84_decoder_vp_h264(struct nv84_decoder *dec,
+ struct pipe_h264_picture_desc *desc,
+ struct nv84_video_buffer *dest)
+{
+ struct h264_iparm1 param1;
+ struct h264_iparm2 param2;
+ int i, width = align(dest->base.width, 16),
+ height = align(dest->base.height, 16);
+
+ struct nouveau_pushbuf *push = dec->vp_pushbuf;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dest->full, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->vp_params, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
+ { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+ bool is_ref = desc->is_reference;
+
+ STATIC_ASSERT(sizeof(struct h264_iparm1) == 0x218);
+ STATIC_ASSERT(sizeof(struct h264_iparm2) == 0x38);
+
+ memset(&param1, 0, sizeof(param1));
+ memset(&param2, 0, sizeof(param2));
+
+ memcpy(&param1.scaling_lists_4x4, desc->scaling_lists_4x4,
+ sizeof(param1.scaling_lists_4x4));
+ memcpy(&param1.scaling_lists_8x8, desc->scaling_lists_8x8,
+ sizeof(param1.scaling_lists_8x8));
+
+ param1.width = width;
+ param1.w1 = param1.w2 = param1.w3 = align(width, 64);
+ param1.height = param1.h2 = height;
+ param1.h1 = param1.h3 = align(height, 32);
+ param1.format = 0x3231564e; /* 'NV12' */
+ param1.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
+ param1.field_pic_flag = desc->field_pic_flag;
+
+ param2.width = width;
+ param2.w1 = param2.w2 = param2.w3 = param1.w1;
+ if (desc->field_pic_flag)
+ param2.height = align(height, 32) / 2;
+ else
+ param2.height = height;
+ param2.h1 = param2.h2 = align(height, 32);
+ param2.h3 = height;
+ param2.mbs = width * height >> 8;
+ if (desc->field_pic_flag) {
+ param2.top = desc->bottom_field_flag ? 2 : 1;
+ param2.bottom = desc->bottom_field_flag;
+ }
+ param2.mb_adaptive_frame_field_flag = desc->mb_adaptive_frame_field_flag;
+ param2.is_reference = desc->is_reference;
+
+ PUSH_SPACE(push, 5 + 16 + 3 + 2 + 6 + (is_ref ? 2 : 0) + 3 + 2 + 4 + 2);
+
+ struct nouveau_bo *ref2_default = dest->full;
+
+ for (i = 0; i < 16; i++) {
+ struct nv84_video_buffer *buf = (struct nv84_video_buffer *)desc->ref[i];
+ struct nouveau_bo *bo1, *bo2;
+ if (buf) {
+ bo1 = buf->interlaced;
+ bo2 = buf->full;
+ if (i == 0)
+ ref2_default = buf->full;
+ } else {
+ bo1 = dest->interlaced;
+ bo2 = ref2_default;
+ }
+ param1.ref1_addrs[i] = bo1->offset;
+ param1.ref2_addrs[i] = bo2->offset;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bo1, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { bo2, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ nouveau_pushbuf_refn(push, bo_refs, sizeof(bo_refs)/sizeof(bo_refs[0]));
+ }
+
+ memcpy(dec->vp_params->map, &param1, sizeof(param1));
+ memcpy(dec->vp_params->map + 0x400, &param2, sizeof(param2));
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ /* Wait for BSP to have completed */
+ BEGIN_NV04(push, SUBC_VP(0x10), 4);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 2);
+ PUSH_DATA (push, 1); /* wait for sem == 2 */
+
+ /* VP step 1 */
+ BEGIN_NV04(push, SUBC_VP(0x400), 15);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, param2.mbs);
+ PUSH_DATA (push, 0x3987654); /* each nibble probably a dma index */
+ PUSH_DATA (push, 0x55001); /* constant */
+ PUSH_DATA (push, dec->vp_params->offset >> 8);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_residual) >> 8);
+ PUSH_DATA (push, dec->vpring_ctrl);
+ PUSH_DATA (push, dec->vpring->offset >> 8);
+ PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
+ PUSH_DATA (push, (dec->mbring->offset + dec->mbring->size - 0x2000) >> 8);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
+ dec->vpring_residual + dec->vpring_deblock) >> 8);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x100008);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_VP(0x620), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ /* VP step 2 */
+ BEGIN_NV04(push, SUBC_VP(0x400), 5);
+ PUSH_DATA (push, 0x54530201);
+ PUSH_DATA (push, (dec->vp_params->offset >> 8) + 0x4);
+ PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
+ dec->vpring_residual) >> 8);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+
+ if (is_ref) {
+ BEGIN_NV04(push, SUBC_VP(0x414), 1);
+ PUSH_DATA (push, dest->full->offset >> 8);
+ }
+
+ BEGIN_NV04(push, SUBC_VP(0x620), 2);
+ PUSH_DATAh(push, dec->vp_fw2_offset);
+ PUSH_DATA (push, dec->vp_fw2_offset);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ /* Set the semaphore back to 1 */
+ BEGIN_NV04(push, SUBC_VP(0x610), 3);
+ PUSH_DATAh(push, dec->fence->offset);
+ PUSH_DATA (push, dec->fence->offset);
+ PUSH_DATA (push, 1);
+
+ /* Write to the semaphore location, intr */
+ BEGIN_NV04(push, SUBC_VP(0x304), 1);
+ PUSH_DATA (push, 0x101);
+
+ for (i = 0; i < 2; i++) {
+ struct nv50_miptree *mt = nv50_miptree(dest->resources[i]);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+
+ PUSH_KICK (push);
+}
+
+static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
+ int16_t ret = val * quant / 16;
+ if (mpeg1 && ret) {
+ if (ret > 0)
+ ret = (ret - 1) | 1;
+ else
+ ret = (ret + 1) | 1;
+ }
+ if (ret < -2048)
+ ret = -2048;
+ else if (ret > 2047)
+ ret = 2047;
+ return ret;
+}
+
+struct mpeg12_mb_info {
+ uint32_t index;
+ uint8_t unk4;
+ uint8_t unk5;
+ uint16_t coded_block_pattern;
+ uint8_t block_counts[6];
+ uint16_t PMV[8];
+ uint16_t skipped;
+};
+
+void
+nv84_decoder_vp_mpeg12_mb(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ const struct pipe_mpeg12_macroblock *macrob)
+{
+ STATIC_ASSERT(sizeof(struct mpeg12_mb_info) == 32);
+
+ struct mpeg12_mb_info info = {0};
+ int i, sum = 0, mask, block_index, count;
+ const int16_t *blocks;
+ int intra = macrob->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA;
+ int motion = macrob->macroblock_type &
+ (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD);
+ const uint8_t *quant_matrix = intra ? dec->mpeg12_intra_matrix :
+ dec->mpeg12_non_intra_matrix;
+ int mpeg1 = dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1;
+
+ info.index = macrob->y * mb(dec->base.width) + macrob->x;
+ info.unk4 = motion;
+ if (intra)
+ info.unk4 |= 1;
+ if (macrob->macroblock_modes.bits.dct_type)
+ info.unk4 |= 0x20;
+ info.unk5 = (macrob->motion_vertical_field_select << 4) |
+ (macrob->macroblock_modes.value & 0xf);
+ info.coded_block_pattern = macrob->coded_block_pattern;
+ if (motion) {
+ memcpy(info.PMV, macrob->PMV, sizeof(info.PMV));
+ }
+ blocks = macrob->blocks;
+ for (mask = 0x20, block_index = 0; mask > 0; mask >>= 1, block_index++) {
+ if ((macrob->coded_block_pattern & mask) == 0)
+ continue;
+
+ count = 0;
+
+ /*
+ * The observation here is that there are a lot of 0's, and things go
+ * a lot faster if one skips over them.
+ */
+
+#if defined(PIPE_ARCH_SSE) && defined(PIPE_ARCH_X86_64)
+/* Note that the SSE implementation is much more tuned to X86_64. As it's not
+ * benchmarked on X86_32, disable it there. I suspect that the code needs to
+ * be reorganized in terms of 32-bit wide data in order to be more
+ * efficient. NV84+ were released well into the 64-bit CPU era, so it should
+ * be a minority case.
+ */
+
+/* This returns a 16-bit bit-mask, each 2 bits are both 1 or both 0, depending
+ * on whether the corresponding (16-bit) word in blocks is zero or non-zero. */
+#define wordmask(blocks, zero) \
+ (uint64_t)(_mm_movemask_epi8( \
+ _mm_cmpeq_epi16( \
+ zero, _mm_load_si128((__m128i *)(blocks)))))
+
+ __m128i zero = _mm_setzero_si128();
+
+ /* TODO: Look into doing the inverse quantization in terms of SSE
+ * operations unconditionally, when necessary. */
+ uint64_t bmask0 = wordmask(blocks, zero);
+ bmask0 |= wordmask(blocks + 8, zero) << 16;
+ bmask0 |= wordmask(blocks + 16, zero) << 32;
+ bmask0 |= wordmask(blocks + 24, zero) << 48;
+ uint64_t bmask1 = wordmask(blocks + 32, zero);
+ bmask1 |= wordmask(blocks + 40, zero) << 16;
+ bmask1 |= wordmask(blocks + 48, zero) << 32;
+ bmask1 |= wordmask(blocks + 56, zero) << 48;
+
+ /* The wordmask macro returns the inverse of what we want, since it
+ * returns a 1 for equal-to-zero. Invert. */
+ bmask0 = ~bmask0;
+ bmask1 = ~bmask1;
+
+ /* Note that the bitmask is actually sequences of 2 bits for each block
+ * index. This is because there is no movemask_epi16. That means that
+ * (a) ffs will never return 64, since the prev bit will always be set
+ * in that case, and (b) we need to do an extra bit shift. Or'ing the
+ * bitmasks together is faster than having a loop that computes them one
+ * at a time and processes them, on a Core i7-920. Trying to put bmask
+ * into an array and then looping also slows things down.
+ */
+
+ /* shift needs to be the same width as i, and unsigned so that / 2
+ * becomes a rshift operation */
+ uint32_t shift;
+ i = 0;
+
+ if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ int16_t tmp;
+ while ((shift = __builtin_ffsll(bmask0))) {
+ i += (shift - 1) / 2;
+ bmask0 >>= shift - 1;
+ *dec->mpeg12_data++ = dec->zscan[i] * 2;
+ tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1);
+ *dec->mpeg12_data++ = tmp;
+ sum += tmp;
+ count++;
+ i++;
+ bmask0 >>= 2;
+ }
+ i = 32;
+ while ((shift = __builtin_ffsll(bmask1))) {
+ i += (shift - 1) / 2;
+ bmask1 >>= shift - 1;
+ *dec->mpeg12_data++ = dec->zscan[i] * 2;
+ tmp = inverse_quantize(blocks[i], quant_matrix[i], mpeg1);
+ *dec->mpeg12_data++ = tmp;
+ sum += tmp;
+ count++;
+ i++;
+ bmask1 >>= 2;
+ }
+ } else {
+ while ((shift = __builtin_ffsll(bmask0))) {
+ i += (shift - 1) / 2;
+ bmask0 >>= shift - 1;
+ *dec->mpeg12_data++ = i * 2;
+ *dec->mpeg12_data++ = blocks[i];
+ count++;
+ i++;
+ bmask0 >>= 2;
+ }
+ i = 32;
+ while ((shift = __builtin_ffsll(bmask1))) {
+ i += (shift - 1) / 2;
+ bmask1 >>= shift - 1;
+ *dec->mpeg12_data++ = i * 2;
+ *dec->mpeg12_data++ = blocks[i];
+ count++;
+ i++;
+ bmask1 >>= 2;
+ }
+ }
+#undef wordmask
+#else
+
+ /*
+ * This loop looks ridiculously written... and it is. I tried a lot of
+ * different ways of achieving this scan, and this was the fastest, at
+ * least on a Core i7-920. Note that it's not necessary to skip the 0's,
+ * the firmware will deal with those just fine. But it's faster to skip
+ * them. Note to people trying benchmarks: make sure to use realistic
+ * mpeg data, which can often be a single data point first followed by
+ * 63 0's, or <data> 7x <0> <data> 7x <0> etc.
+ */
+ i = 0;
+ if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ while (true) {
+ int16_t tmp;
+ while (likely(i < 64 && !(tmp = blocks[i]))) i++;
+ if (i >= 64) break;
+ *dec->mpeg12_data++ = dec->zscan[i] * 2;
+ tmp = inverse_quantize(tmp, quant_matrix[i], mpeg1);
+ *dec->mpeg12_data++ = tmp;
+ sum += tmp;
+ count++;
+ i++;
+ }
+ } else {
+ while (true) {
+ int16_t tmp;
+ while (likely(i < 64 && !(tmp = blocks[i]))) i++;
+ if (i >= 64) break;
+ *dec->mpeg12_data++ = i * 2;
+ *dec->mpeg12_data++ = tmp;
+ count++;
+ i++;
+ }
+ }
+
+#endif
+
+ if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ if (!mpeg1 && (sum & 1) == 0) {
+ if (count && *(dec->mpeg12_data - 2) == 63 * 2) {
+ uint16_t *val = dec->mpeg12_data - 1;
+ if (*val & 1) *val -= 1;
+ else *val += 1;
+ } else {
+ *dec->mpeg12_data++ = 63 * 2;
+ *dec->mpeg12_data++ = 1;
+ count++;
+ }
+ }
+ }
+
+ if (count) {
+ *(dec->mpeg12_data - 2) |= 1;
+ } else {
+ *dec->mpeg12_data++ = 1;
+ *dec->mpeg12_data++ = 0;
+ count = 1;
+ }
+ info.block_counts[block_index] = count;
+ blocks += 64;
+ }
+
+ memcpy(dec->mpeg12_mb_info, &info, sizeof(info));
+ dec->mpeg12_mb_info += sizeof(info);
+
+ if (macrob->num_skipped_macroblocks) {
+ info.index++;
+ info.coded_block_pattern = 0;
+ info.skipped = macrob->num_skipped_macroblocks - 1;
+ memset(info.block_counts, 0, sizeof(info.block_counts));
+ memcpy(dec->mpeg12_mb_info, &info, sizeof(info));
+ dec->mpeg12_mb_info += sizeof(info);
+ }
+}
+
+struct mpeg12_header {
+ uint32_t luma_top_size; // 00
+ uint32_t luma_bottom_size; // 04
+ uint32_t chroma_top_size; // 08
+ uint32_t mbs; // 0c
+ uint32_t mb_info_size; // 10
+ uint32_t mb_width_minus1; // 14
+ uint32_t mb_height_minus1; // 18
+ uint32_t width; // 1c
+ uint32_t height; // 20
+ uint8_t progressive; // 24
+ uint8_t mocomp_only; // 25
+ uint8_t frames; // 26
+ uint8_t picture_structure; // 27
+ uint32_t unk28; // 28 -- 0x50100
+ uint32_t unk2c; // 2c
+ uint32_t pad[4 * 13];
+};
+
+void
+nv84_decoder_vp_mpeg12(struct nv84_decoder *dec,
+ struct pipe_mpeg12_picture_desc *desc,
+ struct nv84_video_buffer *dest)
+{
+ struct nouveau_pushbuf *push = dec->vp_pushbuf;
+ struct nv84_video_buffer *ref1 = (struct nv84_video_buffer *)desc->ref[0];
+ struct nv84_video_buffer *ref2 = (struct nv84_video_buffer *)desc->ref[1];
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { dest->interlaced, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ { dec->mpeg12_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
+ };
+ int i, num_refs = sizeof(bo_refs) / sizeof(*bo_refs);
+ struct mpeg12_header header = {0};
+ struct nv50_miptree *y = nv50_miptree(dest->resources[0]);
+ struct nv50_miptree *uv = nv50_miptree(dest->resources[1]);
+
+ STATIC_ASSERT(sizeof(struct mpeg12_header) == 0x100);
+
+ if (ref1 == NULL)
+ ref1 = dest;
+ if (ref2 == NULL)
+ ref2 = dest;
+ bo_refs[1].bo = ref1->interlaced;
+ bo_refs[2].bo = ref2->interlaced;
+
+ header.luma_top_size = y->layer_stride;
+ header.luma_bottom_size = y->layer_stride;
+ header.chroma_top_size = uv->layer_stride;
+ header.mbs = mb(dec->base.width) * mb(dec->base.height);
+ header.mb_info_size = dec->mpeg12_mb_info - dec->mpeg12_bo->map - 0x100;
+ header.mb_width_minus1 = mb(dec->base.width) - 1;
+ header.mb_height_minus1 = mb(dec->base.height) - 1;
+ header.width = align(dec->base.width, 16);
+ header.height = align(dec->base.height, 16);
+ header.progressive = desc->frame_pred_frame_dct;
+ header.frames = 1 + (desc->ref[0] != NULL) + (desc->ref[1] != NULL);
+ header.picture_structure = desc->picture_structure;
+ header.unk28 = 0x50100;
+
+ memcpy(dec->mpeg12_bo->map, &header, sizeof(header));
+
+ PUSH_SPACE(push, 10 + 3 + 2);
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ BEGIN_NV04(push, SUBC_VP(0x400), 9);
+ PUSH_DATA (push, 0x543210); /* each nibble possibly a dma index */
+ PUSH_DATA (push, 0x555001); /* constant */
+ PUSH_DATA (push, dec->mpeg12_bo->offset >> 8);
+ PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100) >> 8);
+ PUSH_DATA (push, (dec->mpeg12_bo->offset + 0x100 +
+ align(0x20 * mb(dec->base.width) *
+ mb(dec->base.height), 0x100)) >> 8);
+ PUSH_DATA (push, dest->interlaced->offset >> 8);
+ PUSH_DATA (push, ref1->interlaced->offset >> 8);
+ PUSH_DATA (push, ref2->interlaced->offset >> 8);
+ PUSH_DATA (push, 6 * 64 * 8 * header.mbs);
+
+ BEGIN_NV04(push, SUBC_VP(0x620), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+
+ for (i = 0; i < 2; i++) {
+ struct nv50_miptree *mt = nv50_miptree(dest->resources[i]);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+ PUSH_KICK (push);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.c b/src/gallium/drivers/nouveau/nv50/nv98_video.c
new file mode 100644
index 00000000000..069481de207
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+
+#include "util/u_sampler.h"
+#include "util/u_format.h"
+
+static void
+nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+ struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
+ uint32_t comm_seq = ++dec->fence_seq;
+ union pipe_desc desc;
+
+ unsigned vp_caps, is_ref, ret;
+ struct nouveau_vp3_video_buffer *refs[16] = {};
+
+ desc.base = picture;
+
+ assert(target->base.buffer_format == PIPE_FORMAT_NV12);
+
+ ret = nv98_decoder_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes,
+ &vp_caps, &is_ref, refs);
+
+ /* did we decode bitstream correctly? */
+ assert(ret == 2);
+
+ nv98_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+ nv98_decoder_ppp(dec, desc, target, comm_seq);
+}
+
+struct pipe_video_codec *
+nv98_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ struct nouveau_screen *screen = &((struct nv50_context *)context)->screen->base;
+ struct nouveau_vp3_decoder *dec;
+ struct nouveau_pushbuf **push;
+ struct nv04_fifo nv04_data = {.vram = 0xbeef0201, .gart = 0xbeef0202};
+ union nouveau_bo_config cfg;
+
+ cfg.nv50.tile_mode = 0x20;
+ cfg.nv50.memtype = 0x70;
+
+ int ret, i;
+ uint32_t codec = 1, ppp_codec = 3;
+ uint32_t timeout;
+ u32 tmp_size = 0;
+
+ if (getenv("XVMC_VL"))
+ return vl_create_decoder(context, templ);
+
+ if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ debug_printf("%x\n", templ->entrypoint);
+ return NULL;
+ }
+
+ dec = CALLOC_STRUCT(nouveau_vp3_decoder);
+ if (!dec)
+ return NULL;
+ dec->client = screen->client;
+ dec->base = *templ;
+ nouveau_vp3_decoder_init_common(&dec->base);
+
+ dec->bsp_idx = 5;
+ dec->vp_idx = 6;
+ dec->ppp_idx = 7;
+
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ &nv04_data, sizeof(nv04_data), &dec->channel[0]);
+
+ if (!ret)
+ ret = nouveau_pushbuf_new(screen->client, dec->channel[0], 4,
+ 32 * 1024, true, &dec->pushbuf[0]);
+
+ for (i = 1; i < 3; ++i) {
+ dec->channel[i] = dec->channel[0];
+ dec->pushbuf[i] = dec->pushbuf[0];
+ }
+ push = dec->pushbuf;
+
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp);
+ if (ret)
+ goto fail;
+
+ BEGIN_NV04(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[0], dec->bsp->handle);
+
+ BEGIN_NV04(push[0], SUBC_BSP(0x180), 5);
+ for (i = 0; i < 5; i++)
+ PUSH_DATA (push[0], nv04_data.vram);
+
+ BEGIN_NV04(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[1], dec->vp->handle);
+
+ BEGIN_NV04(push[1], SUBC_VP(0x180), 6);
+ for (i = 0; i < 6; i++)
+ PUSH_DATA (push[1], nv04_data.vram);
+
+ BEGIN_NV04(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[2], dec->ppp->handle);
+
+ BEGIN_NV04(push[2], SUBC_PPP(0x180), 5);
+ for (i = 0; i < 5; i++)
+ PUSH_DATA (push[2], nv04_data.vram);
+
+ dec->base.context = context;
+ dec->base.decode_bitstream = nv98_decoder_decode_bitstream;
+
+ for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0, 1 << 20, NULL, &dec->bsp_bo[i]);
+ if (!ret)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0x100, 4 << 20, NULL, &dec->inter_bo[0]);
+ if (!ret)
+ nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
+ if (ret)
+ goto fail;
+
+ switch (u_reduce_video_profile(templ->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ codec = 1;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: {
+ codec = 4;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_VC1: {
+ ppp_codec = codec = 2;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ codec = 3;
+ dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2;
+ tmp_size = dec->tmp_stride * (templ->max_references + 1);
+ assert(templ->max_references <= 16);
+ break;
+ }
+ default:
+ fprintf(stderr, "invalid codec\n");
+ goto fail;
+ }
+
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x4000, NULL, &dec->fw_bo);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset);
+ if (ret)
+ goto fw_fail;
+
+ if (codec != 3) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x400, NULL, &dec->bitplane_bo);
+ if (ret)
+ goto fail;
+ }
+
+ dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2);
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ dec->ref_stride * (templ->max_references+2) + tmp_size,
+ &cfg, &dec->ref_bo);
+ if (ret)
+ goto fail;
+
+ timeout = 0;
+
+ BEGIN_NV04(push[0], SUBC_BSP(0x200), 2);
+ PUSH_DATA (push[0], codec);
+ PUSH_DATA (push[0], timeout);
+
+ BEGIN_NV04(push[1], SUBC_VP(0x200), 2);
+ PUSH_DATA (push[1], codec);
+ PUSH_DATA (push[1], timeout);
+
+ BEGIN_NV04(push[2], SUBC_PPP(0x200), 2);
+ PUSH_DATA (push[2], ppp_codec);
+ PUSH_DATA (push[2], timeout);
+
+ ++dec->fence_seq;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP,
+ 0, 0x1000, NULL, &dec->fence_bo);
+ if (ret)
+ goto fail;
+
+ nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client);
+ dec->fence_map = dec->fence_bo->map;
+ dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0;
+ dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
+
+ /* So lets test if the fence is working? */
+ nouveau_pushbuf_space(push[0], 6, 1, 0);
+ PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_seq);
+
+ BEGIN_NV04(push[0], SUBC_BSP(0x304), 1);
+ PUSH_DATA (push[0], 0);
+ PUSH_KICK (push[0]);
+
+ nouveau_pushbuf_space(push[1], 6, 1, 0);
+ PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
+ PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], dec->fence_seq);
+
+ BEGIN_NV04(push[1], SUBC_VP(0x304), 1);
+ PUSH_DATA (push[1], 0);
+ PUSH_KICK (push[1]);
+
+ nouveau_pushbuf_space(push[2], 6, 1, 0);
+ PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], dec->fence_seq);
+
+ BEGIN_NV04(push[2], SUBC_PPP(0x304), 1);
+ PUSH_DATA (push[2], 0);
+ PUSH_KICK (push[2]);
+
+ usleep(100);
+ while (dec->fence_seq > dec->fence_map[0] ||
+ dec->fence_seq > dec->fence_map[4] ||
+ dec->fence_seq > dec->fence_map[8]) {
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+ usleep(100);
+ }
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+#endif
+
+ return &dec->base;
+
+fw_fail:
+ debug_printf("Cannot create decoder without firmware..\n");
+ dec->base.destroy(&dec->base);
+ return NULL;
+
+fail:
+ debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret);
+ dec->base.destroy(&dec->base);
+ return NULL;
+}
+
+struct pipe_video_buffer *
+nv98_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *templat)
+{
+ return nouveau_vp3_video_buffer_create(
+ pipe, templat, NV50_RESOURCE_FLAG_VIDEO);
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.h b/src/gallium/drivers/nouveau/nv50/nv98_video.h
new file mode 100644
index 00000000000..cec761df4ab
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv50_context.h"
+#include "nv50/nv50_screen.h"
+#include "nouveau_vp3_video.h"
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_types.h"
+
+#include "util/u_video.h"
+
+extern unsigned
+nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq);
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
new file mode 100644
index 00000000000..97d4119b6d1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_bsp(struct comm *comm)
+{
+ unsigned idx = comm->bsp_cur_index & 0xf;
+ debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs);
+ debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]);
+}
+#endif
+
+unsigned
+nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[0];
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ uint32_t bsp_addr, comm_addr, inter_addr;
+ uint32_t slice_size, bucket_size, ring_size;
+ uint32_t caps;
+ int ret;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ unsigned fence_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ if (!dec->bitplane_bo)
+ num_refs--;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
+
+ caps = nouveau_vp3_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes);
+
+ nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+
+ nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+ inter_addr = inter_bo->offset >> 8;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ memset(dec->comm, 0, 0x200);
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+
+ BEGIN_NV04(push, SUBC_BSP(0x700), 5);
+ PUSH_DATA (push, caps); // 700 cmd
+ PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp
+ PUSH_DATA (push, bsp_addr + 7); // 708 str addr
+ PUSH_DATA (push, comm_addr); // 70c comm
+ PUSH_DATA (push, comm_seq); // 710 seq
+
+ if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ u32 bitplane_addr;
+ int mpeg12 = (codec == PIPE_VIDEO_FORMAT_MPEG12);
+
+ bitplane_addr = dec->bitplane_bo->offset >> 8;
+
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NV04(push, SUBC_BSP(0x400), mpeg12 ? 5 : 7);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr
+ PUSH_DATA (push, ring_size << 8); // 40c interdata_size
+ if (!mpeg12) {
+ PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA
+ PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE
+ }
+ PUSH_DATA (push, 0); // dma idx
+ } else {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NV04(push, SUBC_BSP(0x400), 8);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, slice_size << 8); // 408 interparm size?
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr
+ PUSH_DATA (push, ring_size << 8); // 410 interdata size
+ PUSH_DATA (push, inter_addr + slice_size); // 414 bucket?
+ PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted..
+ PUSH_DATA (push, 0); // 41c targets
+ // TODO: Double check 414 / 418 with nvidia trace
+ }
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NV04(push, SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NV04(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]);
+ dump_comm_bsp(dec->comm);
+ }
+ } while (dec->fence_seq > dec->fence_map[0]);
+ }
+
+ dump_comm_bsp(dec->comm);
+ return dec->comm->status[comm_seq & 0xf];
+#else
+ BEGIN_NV04(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+ return 2;
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
new file mode 100644
index 00000000000..6b0b7148dcb
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+
+static void
+nv98_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ uint32_t stride_in = mb(dec->base.width);
+ uint32_t stride_out = mb(target->resources[0]->width0);
+ uint32_t dec_h = mb(dec->base.height);
+ uint32_t dec_w = mb(dec->base.width);
+ uint64_t in_addr;
+ uint32_t y2, cbcr, cbcr2, i;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ };
+ unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+ bo_refs[i].bo = mt->base.bo;
+ }
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+ nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2);
+
+ BEGIN_NV04(push, SUBC_PPP(0x700), 10);
+ in_addr = nouveau_vp3_video_addr(dec, target) >> 8;
+
+ PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700
+ PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704
+ assert(dec_w == stride_in);
+
+ /* Input: */
+ PUSH_DATA (push, in_addr); // 708
+ PUSH_DATA (push, in_addr + y2); // 70c
+ PUSH_DATA (push, in_addr + cbcr); // 710
+ PUSH_DATA (push, in_addr + cbcr2); // 714
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+
+ PUSH_DATA (push, mt->base.address >> 8);
+ PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+}
+
+static uint32_t
+nv98_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ nv98_decoder_setup_ppp(dec, target, 0x1412);
+ assert(!desc->deblockEnable);
+ assert(!(dec->base.width & 0xf));
+ assert(!(dec->base.height & 0xf));
+
+ BEGIN_NV04(push, SUBC_PPP(0x400), 1);
+ PUSH_DATA (push, desc->pquant << 11);
+
+ // 728 = wtf?
+ return 0x10;
+}
+
+void
+nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) {
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+ unsigned ppp_caps = 0x10;
+ unsigned fence_extra = 0;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+
+ switch (codec) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1;
+ nv98_decoder_setup_ppp(dec, target, 0x1410 | mpeg2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: nv98_decoder_setup_ppp(dec, target, 0x1414); break;
+ case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nv98_decoder_vc1_ppp(dec, desc.vc1, target); break;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: nv98_decoder_setup_ppp(dec, target, 0x1413); break;
+ default: assert(0);
+ }
+ BEGIN_NV04(push, SUBC_PPP(0x734), 2);
+ PUSH_DATA (push, comm_seq);
+ PUSH_DATA (push, ppp_caps);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NV04(push, SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NV04(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff)
+ debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]);
+ } while (dec->fence_seq > dec->fence_map[8]);
+ }
+#else
+ BEGIN_NV04(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
new file mode 100644
index 00000000000..9b756ea73f5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst, Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv50/nv98_video.h"
+#include <sys/mman.h>
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
+ struct nouveau_bo *inter_bo, unsigned slice_size)
+{
+ unsigned i, idx = comm->pvp_cur_index & 0xf;
+ debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
+#if 0
+ debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
+ debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
+
+ for (i = 0; i != comm->irq_index; ++i)
+ debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
+ for (i = 0; i != comm->parse_endpos_index; ++i)
+ debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
+#endif
+ debug_printf("mb_y = %u\n", comm->mb_y[idx]);
+ if (comm->status_vp[idx] == 1)
+ return;
+
+ if ((comm->pvp_stage & 0xff) != 0xff) {
+ unsigned *map;
+ assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
+ map = inter_bo->map;
+ for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
+ debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
+ }
+ munmap(inter_bo->map, inter_bo->size);
+ inter_bo->map = NULL;
+ }
+ assert((comm->pvp_stage & 0xff) == 0xff);
+}
+#endif
+
+static void
+nv98_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
+{
+ dec->refs[target->valid_ref].vidbuf = NULL;
+ dec->refs[target->valid_ref].last_used = 0;
+// debug_printf("Unreffed %p\n", target);
+}
+
+void
+nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[1];
+ uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
+ uint32_t slice_size, bucket_size, ring_size, i;
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ u32 fence_extra = 0, codec_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ codec_extra += 2;
+ } else
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+
+ if (dec->base.max_references > 2)
+ codec_extra += 1 + (dec->base.max_references - 2);
+
+ pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8;
+ last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8;
+
+ for (i = 0; i < dec->base.max_references; ++i) {
+ if (!refs[i])
+ pic_addr[i] = last_addr;
+ else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
+ last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8;
+ else
+ pic_addr[i] = null_addr;
+ }
+ if (!is_ref)
+ nv98_decoder_kick_ref(dec, target);
+
+ nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
+ 6 + codec_extra + fence_extra + 2, num_refs, 0);
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+#if NOUVEAU_VP3_DEBUG_FENCE
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+ inter_addr = inter_bo->offset >> 8;
+ if (dec->fw_bo)
+ ucode_addr = dec->fw_bo->offset >> 8;
+ else
+ ucode_addr = 0;
+
+ BEGIN_NV04(push, SUBC_VP(0x700), 7);
+ PUSH_DATA (push, caps); // 700
+ PUSH_DATA (push, comm_seq); // 704
+ PUSH_DATA (push, 0); // 708 fuc targets, ignored for nv98
+ PUSH_DATA (push, dec->fw_sizes); // 70c
+ PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
+ PUSH_DATA (push, inter_addr); // 714 inter_parm
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
+
+ if (bucket_size) {
+ uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
+
+ BEGIN_NV04(push, SUBC_VP(0x71c), 2);
+ PUSH_DATA (push, tmpimg_addr >> 8); // 71c
+ PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
+ }
+
+ BEGIN_NV04(push, SUBC_VP(0x724), 5);
+ PUSH_DATA (push, comm_addr); // 724
+ PUSH_DATA (push, ucode_addr); // 728
+ PUSH_DATA (push, pic_addr[16]); // 734
+ PUSH_DATA (push, pic_addr[0]); // 72c
+ PUSH_DATA (push, pic_addr[1]); // 730
+
+ if (dec->base.max_references > 2) {
+ int i;
+
+ BEGIN_NV04(push, SUBC_VP(0x400), dec->base.max_references - 2);
+ for (i = 2; i < dec->base.max_references; ++i) {
+ assert(0x400 + (i - 2) * 4 < 0x438);
+ PUSH_DATA (push, pic_addr[i]);
+ }
+ }
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ BEGIN_NV04(push, SUBC_VP(0x438), 1);
+ PUSH_DATA (push, desc.h264->slice_count);
+ }
+
+ //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NV04(push, SUBC_VP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK(push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]);
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+ }
+ } while (dec->fence_seq > dec->fence_map[4]);
+ }
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+#else
+ BEGIN_NV04(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}