summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2020-06-07 09:52:46 +1000
committerMarge Bot <eric+marge@anholt.net>2020-06-10 22:52:42 +0000
commit268dc60d3a091bc563e319c38e74cc10e544aa8a (patch)
treea99255e6985c066cd87a6f23234c1bc4ed4520ac
parent839aeffb49ef989d2eab3a62e04f1cb8b907d7d7 (diff)
nvc0: initial support for gv100
v2: - remove unnecessary MAX2() - add proper method definitions Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Acked-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
-rw-r--r--src/gallium/drivers/nouveau/Makefile.sources2
-rw-r--r--src/gallium/drivers/nouveau/meson.build2
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c6
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h245
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c5
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c57
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c10
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c111
-rw-r--r--src/gallium/drivers/nouveau/nvc0/qmd.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/qmdc3c0.c168
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c1
13 files changed, 578 insertions, 35 deletions
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 313f41c79bd..9de8168fbd9 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -153,10 +153,12 @@ NVC0_CODEGEN_SOURCES := \
NVC0_C_SOURCES := \
nvc0/cla0c0qmd.h \
nvc0/clc0c0qmd.h \
+ nvc0/clc3c0qmd.h \
nvc0/drf.h \
nvc0/qmd.h \
nvc0/qmda0c0.c \
nvc0/qmdc0c0.c \
+ nvc0/qmdc3c0.c \
nvc0/gm107_texture.xml.h \
nvc0/nvc0_3d.xml.h \
nvc0/nvc0_compute.c \
diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build
index 9d93a5ef42c..d0cfbe76478 100644
--- a/src/gallium/drivers/nouveau/meson.build
+++ b/src/gallium/drivers/nouveau/meson.build
@@ -169,10 +169,12 @@ files_libnouveau = files(
'codegen/nv50_ir_target_nvc0.h',
'nvc0/cla0c0qmd.h',
'nvc0/clc0c0qmd.h',
+ 'nvc0/clc3c0qmd.h',
'nvc0/drf.h',
'nvc0/qmd.h',
'nvc0/qmda0c0.c',
'nvc0/qmdc0c0.c',
+ 'nvc0/qmdc3c0.c',
'nvc0/gm107_texture.xml.h',
'nvc0/nvc0_3d.xml.h',
'nvc0/nvc0_compute.c',
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 7f90784b8ed..702d88b6c93 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
if (nv_dbg)
nouveau_mesa_debug = atoi(nv_dbg);
- screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+ if (dev->chipset < 0x140)
+ screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+ else
+ screen->prefer_nir = true;
+
screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false);
if (screen->force_enable_cl)
glsl_type_singleton_init_or_ref();
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 664bfae9f64..0c1337028f3 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -195,6 +195,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GM200_3D_CLASS 0x0000b197
#define GP100_3D_CLASS 0x0000c097
#define GP102_3D_CLASS 0x0000c197
+#define GV100_3D_CLASS 0x0000c397
#define NV50_2D_CLASS 0x0000502d
#define NVC0_2D_CLASS 0x0000902d
#define NV50_COMPUTE_CLASS 0x000050c0
@@ -207,6 +208,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GM200_COMPUTE_CLASS 0x0000b1c0
#define GP100_COMPUTE_CLASS 0x0000c0c0
#define GP104_COMPUTE_CLASS 0x0000c1c0
+#define GV100_COMPUTE_CLASS 0x0000c3c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
new file mode 100644
index 00000000000..588cc639d32
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
@@ -0,0 +1,245 @@
+/*******************************************************************************
+ Copyright (c) 2001-2010 NVIDIA Corporation
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to
+ deal in the Software without restriction, including without limitation the
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+/* AUTO GENERATED FILE -- DO NOT EDIT */
+
+#ifndef __CLC3C0QMD_H__
+#define __CLC3C0QMD_H__
+
+/*
+** Queue Meta Data, Version 02_02
+ */
+
+// The below C preprocessor definitions describe "multi-word" structures, where
+// fields may have bit numbers beyond 32. For example, MW(127:96) means
+// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)"
+// syntax is to distinguish from similar "X:Y" single-word definitions: the
+// macros historically used for single-word definitions would fail with
+// multi-word definitions.
+//
+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
+// interface layer of nvidia.ko for an example of how to manipulate
+// these MW(X:Y) definitions.
+
+#define NVC3C0_QMDV02_02_OUTER_PUT MW(30:0)
+#define NVC3C0_QMDV02_02_OUTER_OVERFLOW MW(31:31)
+#define NVC3C0_QMDV02_02_OUTER_GET MW(62:32)
+#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVC3C0_QMDV02_02_INNER_GET MW(94:64)
+#define NVC3C0_QMDV02_02_INNER_OVERFLOW MW(95:95)
+#define NVC3C0_QMDV02_02_INNER_PUT MW(126:96)
+#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVC3C0_QMDV02_02_QMD_GROUP_ID MW(133:128)
+#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE MW(134:134)
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135)
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_IS_QUEUE MW(136:136)
+#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137)
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0 MW(138:138)
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1 MW(139:139)
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS MW(140:140)
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE MW(142:142)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE 0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID 0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY MW(143:143)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_QMD_RESERVED_B MW(159:144)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE MW(184:160)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_C MW(185:185)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE MW(189:189)
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE MW(190:190)
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191)
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME MW(223:192)
+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME MW(239:224)
+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME MW(255:240)
+#define NVC3C0_QMDV02_02_PROGRAM_OFFSET MW(287:256)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_D MW(335:328)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE MW(369:368)
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX MW(382:382)
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH MW(415:384)
+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT MW(431:416)
+#define NVC3C0_QMDV02_02_QMD_RESERVED13A MW(447:432)
+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH MW(463:448)
+#define NVC3C0_QMDV02_02_QMD_RESERVED14A MW(479:464)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER MW(511:480)
+#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512)
+#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD MW(529:522)
+#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE MW(561:544)
+#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE MW(568:562)
+#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE MW(575:569)
+#define NVC3C0_QMDV02_02_QMD_VERSION MW(579:576)
+#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION MW(583:580)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_H MW(591:584)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_REGISTER_COUNT_V MW(656:648)
+#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE MW(663:657)
+#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM MW(671:664)
+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_J MW(783:776)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC3C0_QMDV02_02_QMD_RESERVED_K MW(791:791)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD MW(831:800)
+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_L MW(879:872)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC3C0_QMDV02_02_QMD_RESERVED_M MW(887:887)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD MW(927:896)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_N MW(954:952)
+#define NVC3C0_QMDV02_02_BARRIER_COUNT MW(959:955)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960)
+#define NVC3C0_QMDV02_02_REGISTER_COUNT MW(991:984)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992)
+#define NVC3C0_QMDV02_02_SASS_VERSION MW(1023:1016)
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64))
+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER MW(1567:1536)
+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER MW(1584:1568)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_S MW(1599:1585)
+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET MW(1630:1600)
+#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631)
+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT MW(1662:1632)
+#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE MW(1663:1663)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_Q MW(1694:1694)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696)
+#define NVC3C0_QMDV02_02_QMD_SPARE_G MW(1759:1728)
+#define NVC3C0_QMDV02_02_QMD_SPARE_H MW(1791:1760)
+#define NVC3C0_QMDV02_02_QMD_SPARE_I MW(1823:1792)
+#define NVC3C0_QMDV02_02_QMD_SPARE_J MW(1855:1824)
+#define NVC3C0_QMDV02_02_QMD_SPARE_K MW(1887:1856)
+#define NVC3C0_QMDV02_02_QMD_SPARE_L MW(1919:1888)
+#define NVC3C0_QMDV02_02_QMD_SPARE_M MW(1951:1920)
+#define NVC3C0_QMDV02_02_QMD_SPARE_N MW(1983:1952)
+#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER MW(2015:1984)
+#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER MW(2047:2016)
+
+
+
+#endif // #ifndef __CLC3C0QMD_H__
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
index 221bab3105b..1c5a8dc0b1f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -1787,6 +1787,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_SP_UNK14__ESIZE 0x00000004
#define NVC0_3D_SP_UNK14__LEN 0x00000004
+#define GV100_3D_SP_ADDRESS_HIGH(i0) (0x00002014 + 0x40*(i0))
+#define GV100_3D_SP_ADDRESS_LOW(i0) (0x00002018 + 0x40*(i0))
+
#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 4f3d1cd5b3b..b9fff341f28 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
prog->code_size = info->bin.codeSize;
prog->relocs = info->bin.relocData;
prog->fixups = info->bin.fixupData;
- prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+ if (info->target >= NVISA_GV100_CHIPSET)
+ prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why?
+ else
+ prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->cp.smem_size = info->bin.smemSize;
prog->num_barriers = info->numBarriers;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 64a22104a1f..2807b59a4fd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -444,8 +444,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_PREFERRED_IR:
return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS: {
- uint32_t irs = 1 << PIPE_SHADER_IR_TGSI |
- 1 << PIPE_SHADER_IR_NIR;
+ uint32_t irs = 1 << PIPE_SHADER_IR_NIR |
+ ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI);
if (screen->force_enable_cl)
irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
return irs;
@@ -468,6 +468,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return shader != PIPE_SHADER_FRAGMENT;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ /* HW doesn't support indirect addressing of fragment program inputs
+ * on Volta. The binary driver generates a function to handle every
+ * possible indirection, and indirectly calls the function to handle
+ * this instead.
+ */
+ if (class_3d >= GV100_3D_CLASS)
+ return shader != PIPE_SHADER_FRAGMENT;
+ return 1;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
@@ -731,8 +739,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
PUSH_DATA (push, 0xff);
PUSH_DATA (push, 0xff);
- BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
- PUSH_DATA (push, 0x3f);
+ if (obj_class < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
+ PUSH_DATA (push, 0x3f);
+ }
BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
PUSH_DATA (push, (3 << 16) | 3);
@@ -764,8 +774,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
PUSH_DATA (push, 3);
- BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
- PUSH_DATA (push, 0x3fffff);
+ if (obj_class < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
+ PUSH_DATA (push, 0x3fffff);
+ }
BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
PUSH_DATA (push, 1);
BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
@@ -825,6 +837,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
default:
return -1;
@@ -896,13 +909,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
/* update the code segment setup */
- BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->text->offset);
- PUSH_DATA (push, screen->text->offset);
- if (screen->compute) {
- BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+ if (screen->eng3d->oclass < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
+ if (screen->compute) {
+ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+ }
}
return 0;
@@ -981,6 +996,7 @@ nvc0_screen_create(struct nouveau_device *dev)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
break;
default:
return NULL;
@@ -1047,16 +1063,18 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.fence.emit = nvc0_screen_fence_emit;
screen->base.fence.update = nvc0_screen_fence_update;
+ if (dev->chipset < 0x140) {
+ ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
+ NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
+ if (ret)
+ FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
- ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
- NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
- if (ret)
- FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
-
- BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
- PUSH_DATA (push, screen->nvsw->handle);
+ BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->nvsw->handle);
+ }
switch (dev->chipset & ~0xf) {
+ case 0x140:
case 0x130:
case 0x120:
case 0x110:
@@ -1110,6 +1128,9 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->fence.bo->offset + 16);
switch (dev->chipset & ~0xf) {
+ case 0x140:
+ obj_class = GV100_3D_CLASS;
+ break;
case 0x130:
switch (dev->chipset) {
case 0x130:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 02f1c6fba8f..490026b2c00 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -70,8 +70,14 @@ nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
- PUSH_DATA (push, prog->code_base);
+ if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
+ PUSH_DATA (push, prog->code_base);
+ } else {
+ BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
+ PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
+ PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
+ }
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 58f29213dad..3a3f0a926de 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -31,11 +31,14 @@
#include "qmd.h"
#include "cla0c0qmd.h"
#include "clc0c0qmd.h"
+#include "clc3c0qmd.h"
#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
int
nve4_screen_compute_setup(struct nvc0_screen *screen,
@@ -49,6 +52,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
uint64_t address;
switch (dev->chipset & ~0xf) {
+ case 0x140:
+ obj_class = GV100_COMPUTE_CLASS;
+ break;
case 0x100:
case 0xf0:
obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
@@ -92,24 +98,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
PUSH_DATA (push, 0xff);
- BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
- PUSH_DATAh(push, screen->tls->size / screen->mp_count);
- PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
- PUSH_DATA (push, 0xff);
+ if (obj_class < GV100_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
+ PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+ PUSH_DATA (push, 0xff);
+ }
/* Unified address space ? Who needs that ? Certainly not OpenCL.
*
* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
* accessible. We cannot prevent that at the moment, so expect failure.
*/
- BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
- PUSH_DATA (push, 0xff << 24);
- BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
- PUSH_DATA (push, 0xfe << 24);
-
- BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->text->offset);
- PUSH_DATA (push, screen->text->offset);
+ if (obj_class < GV100_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
+ PUSH_DATA (push, 0xff << 24);
+ BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
+ PUSH_DATA (push, 0xfe << 24);
+
+ BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+ } else {
+ BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);
+ PUSH_DATAh(push, 0xfeULL << 24);
+ PUSH_DATA (push, 0xfeULL << 24);
+ BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);
+ PUSH_DATAh(push, 0xffULL << 24);
+ PUSH_DATA (push, 0xffULL << 24);
+ }
BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
@@ -717,6 +734,69 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
nve4_compute_setup_buf_cb(nvc0, true, qmd);
}
+static int
+gv100_sm_config_smem_size(u32 size)
+{
+ if (size > 64 * 1024) size = 96 * 1024;
+ else if (size > 32 * 1024) size = 64 * 1024;
+ else if (size > 16 * 1024) size = 32 * 1024;
+ else if (size > 8 * 1024) size = 16 * 1024;
+ else size = 8 * 1024;
+ return (size / 4096) + 1;
+}
+
+static void
+gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
+ const struct pipe_grid_info *info)
+{
+ struct nvc0_program *cp = nvc0->compprog;
+ struct nvc0_screen *screen = nvc0->screen;
+ uint64_t entry =
+ screen->text->offset + nvc0_program_symbol_offset(cp, info->pc);
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+ NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
+ NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, VIA_HEADER_INDEX);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
+ align(cp->cp.smem_size, 0x100));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(8 * 1024));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(96 * 1024));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(cp->cp.smem_size));
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
+
+ // Only bind user uniforms and the driver constant buffer through the
+ // launch descriptor because UBOs are sticked to the driver cb to avoid the
+ // limitation of 8 CBs.
+ if (nvc0->constbuf[5][0].user || cp->parm_size) {
+ gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
+ NVC0_CB_USR_INFO(5), 1 << 16);
+
+ // Later logic will attempt to bind a real buffer at position 0. That
+ // should not happen if we've bound a user buffer.
+ assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
+ }
+ gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
+ NVC0_CB_AUX_INFO(5), 1 << 11);
+
+ nve4_compute_setup_buf_cb(nvc0, true, qmd);
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
+}
+
static inline void *
nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
struct nouveau_bo **pbo, uint64_t *pgpuaddr)
@@ -787,6 +867,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
if (ret)
goto out;
+ if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
+ gv100_compute_setup_launch_desc(nvc0, desc, info);
+ else
if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
gp100_compute_setup_launch_desc(nvc0, desc, info);
else
@@ -797,6 +880,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
#ifndef NDEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
debug_printf("Queue Meta Data:\n");
+ if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
+ NVC3C0QmdDump_V02_02(desc);
+ else
if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
NVC0C0QmdDump_V02_01(desc);
else
@@ -931,7 +1017,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}
-
#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
static void
nve4_compute_trap_info(struct nvc0_context *nvc0)
diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h
index 50db3cb5cf9..86c290fe836 100644
--- a/src/gallium/drivers/nouveau/nvc0/qmd.h
+++ b/src/gallium/drivers/nouveau/nvc0/qmd.h
@@ -64,4 +64,5 @@
void NVA0C0QmdDump_V00_06(uint32_t *);
void NVC0C0QmdDump_V02_01(uint32_t *);
+void NVC3C0QmdDump_V02_02(uint32_t *);
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
new file mode 100644
index 00000000000..c9bd8966114
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "qmd.h"
+#include "clc3c0qmd.h"
+
+#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a)
+
+void
+NVC3C0QmdDump_V02_02(uint32_t *qmd)
+{
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
+ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
+ NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
+ NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
+ NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
+ for (int i = 0; i < 8; i++)
+ NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x");
+ for (int i = 0; i < 8; i++) {
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
+ NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
+ }
+ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
+}
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 5c43518afcb..2dbe7be0211 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -104,6 +104,7 @@ nouveau_drm_screen_create(int fd)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
init = nvc0_screen_create;
break;
default: