summaryrefslogtreecommitdiff
path: root/src/freedreno
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2020-10-27 16:12:27 +0100
committerConnor Abbott <cwabbott0@gmail.com>2020-11-19 17:55:03 +0100
commit92fe6fa0ccff47953ce3c2cbc550323d6ccac1b2 (patch)
tree31d885dc028bccbe3f6e0ddb87e76a222cb57be5 /src/freedreno
parent3d5bed03e13859b6aa066f3b7dcb9c699726c9d4 (diff)
freedreno/a6xx: Document private memory registers
They seem to be broadly similar to the a3xx ones, albeit with some things shuffled around and with different units, and the extra layout mode bits. We also document the FIRST_EXEC_OFFSET registers, so that we can start properly setting them all to 0 in freedreno and turnip in later commits. I discovered the compute one when playing with function support in the blob CL driver, and added the other registers via analogy (the blob Vulkan driver sets FIRST_EXEC_OFFSET and the shader VA together in one packet for all stages, so it seems to really be in the same place for all stages). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7386>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/.gitlab-ci/reference/crash.log120
-rw-r--r--src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log12
-rw-r--r--src/freedreno/.gitlab-ci/reference/fd-clouds.log30
-rw-r--r--src/freedreno/registers/adreno/a6xx.xml108
-rw-r--r--src/freedreno/vulkan/tu_cmd_buffer.c4
5 files changed, 184 insertions, 90 deletions
diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log
index 502fa640f42..76ee876e107 100644
--- a/src/freedreno/.gitlab-ci/reference/crash.log
+++ b/src/freedreno/.gitlab-ci/reference/crash.log
@@ -7962,26 +7962,26 @@ clusters:
00000000 SP_VS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_VS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_VS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- 00000000 SP_UNKNOWN_A81B: 0
+ 00000000 SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
8e5d7d37 SP_VS_OBJ_START_LO: 0x8e5d7d37
0001fcd5 SP_VS_OBJ_START_HI: 0x1fcd5
- 00000000 0xa81e: 00000000
- 00000000 0xa81f: 00000000
- 00000000 0xa820: 00000000
- 00000000 0xa821: 00000000
+ 00000000 SP_VS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_VS_PVT_MEM_ADDR: 0
+ 00000000 SP_VS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_VS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_VS_TEX_COUNT: 128
00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_VS_INSTRLEN: 0
00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
00000000 SP_HS_UNKNOWN_A831: 0
00000000 0xa832: 00000000
- 00000000 SP_HS_UNKNOWN_A833: 0
+ 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
780a8ca5 SP_HS_OBJ_START_LO: 0x780a8ca5
0001aad2 SP_HS_OBJ_START_HI: 0x1aad2
- 00000000 0xa836: 00000000
- 00000000 0xa837: 00000000
- 00000000 0xa838: 00000000
- 00000000 0xa839: 00000000
+ 00000000 SP_HS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_HS_PVT_MEM_ADDR: 0
+ 00000000 SP_HS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_HS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_HS_TEX_COUNT: 128
00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_HS_INSTRLEN: 0
@@ -8012,13 +8012,13 @@ clusters:
00000000 SP_DS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_DS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_DS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- 00000000 SP_DS_UNKNOWN_A85B: 0
+ 00000000 SP_DS_OBJ_FIRST_EXEC_OFFSET: 0
7abf500d SP_DS_OBJ_START_LO: 0x7abf500d
00017e52 SP_DS_OBJ_START_HI: 0x17e52
- 00000000 0xa85e: 00000000
- 00000000 0xa85f: 00000000
- 00000000 0xa860: 00000000
- 00000000 0xa861: 00000000
+ 00000000 SP_DS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_DS_PVT_MEM_ADDR: 0
+ 00000000 SP_DS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_DS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_DS_TEX_COUNT: 128
00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_DS_INSTRLEN: 0
@@ -8050,13 +8050,13 @@ clusters:
00000000 SP_GS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_GS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_GS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- 00000000 0xa88c: 00000000
+ 00000000 SP_GS_OBJ_FIRST_EXEC_OFFSET: 0
14e2046b SP_GS_OBJ_START_LO: 0x14e2046b
00004c8f SP_GS_OBJ_START_HI: 0x4c8f
- 00000000 0xa88f: 00000000
- 00000000 0xa890: 00000000
- 00000000 0xa891: 00000000
- 00000000 0xa892: 00000000
+ 00000000 SP_GS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_GS_PVT_MEM_ADDR: 0
+ 00000000 SP_GS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_GS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_GS_TEX_COUNT: 128
00000100 SP_GS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_GS_INSTRLEN: 0
@@ -8108,26 +8108,26 @@ clusters:
00000000 SP_VS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_VS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_VS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- 00000000 SP_UNKNOWN_A81B: 0
+ 00000000 SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
8e5d7d37 SP_VS_OBJ_START_LO: 0x8e5d7d37
0001fcd5 SP_VS_OBJ_START_HI: 0x1fcd5
- 00000000 0xa81e: 00000000
- 00000000 0xa81f: 00000000
- 00000000 0xa820: 00000000
- 00000000 0xa821: 00000000
+ 00000000 SP_VS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_VS_PVT_MEM_ADDR: 0
+ 00000000 SP_VS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_VS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_VS_TEX_COUNT: 128
00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_VS_INSTRLEN: 0
00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
00000000 SP_HS_UNKNOWN_A831: 0
00000000 0xa832: 00000000
- 00000000 SP_HS_UNKNOWN_A833: 0
+ 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
780a8ca5 SP_HS_OBJ_START_LO: 0x780a8ca5
0001aad2 SP_HS_OBJ_START_HI: 0x1aad2
- 00000000 0xa836: 00000000
- 00000000 0xa837: 00000000
- 00000000 0xa838: 00000000
- 00000000 0xa839: 00000000
+ 00000000 SP_HS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_HS_PVT_MEM_ADDR: 0
+ 00000000 SP_HS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_HS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_HS_TEX_COUNT: 128
00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_HS_INSTRLEN: 0
@@ -8158,13 +8158,13 @@ clusters:
00000000 SP_DS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_DS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_DS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- 00000000 SP_DS_UNKNOWN_A85B: 0
+ 00000000 SP_DS_OBJ_FIRST_EXEC_OFFSET: 0
7abf500d SP_DS_OBJ_START_LO: 0x7abf500d
00017e52 SP_DS_OBJ_START_HI: 0x17e52
- 00000000 0xa85e: 00000000
- 00000000 0xa85f: 00000000
- 00000000 0xa860: 00000000
- 00000000 0xa861: 00000000
+ 00000000 SP_DS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_DS_PVT_MEM_ADDR: 0
+ 00000000 SP_DS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_DS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_DS_TEX_COUNT: 128
00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_DS_INSTRLEN: 0
@@ -8196,13 +8196,13 @@ clusters:
00000000 SP_GS_VPC_DST[0x5].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_GS_VPC_DST[0x6].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
00000000 SP_GS_VPC_DST[0x7].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- 00000000 0xa88c: 00000000
+ 00000000 SP_GS_OBJ_FIRST_EXEC_OFFSET: 0
14e2046b SP_GS_OBJ_START_LO: 0x14e2046b
00004c8f SP_GS_OBJ_START_HI: 0x4c8f
- 00000000 0xa88f: 00000000
- 00000000 0xa890: 00000000
- 00000000 0xa891: 00000000
- 00000000 0xa892: 00000000
+ 00000000 SP_GS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_GS_PVT_MEM_ADDR: 0
+ 00000000 SP_GS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_GS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_GS_TEX_COUNT: 128
00000100 SP_GS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
00000000 SP_GS_INSTRLEN: 0
@@ -8396,13 +8396,13 @@ clusters:
- context: 0
05100000 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | PIXLODENABLE | 0x1000000 }
00000000 SP_FS_BRANCH_COND: 0
- 00000000 SP_UNKNOWN_A982: 0
+ 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
4bdb43d8 SP_FS_OBJ_START_LO: 0x4bdb43d8
0001af86 SP_FS_OBJ_START_HI: 0x1af86
- 00000000 0xa985: 00000000
- 00000000 0xa986: 00000000
- 00000000 0xa987: 00000000
- 00000000 0xa988: 00000000
+ 00000000 SP_FS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_FS_PVT_MEM_ADDR: 0
+ 00000000 SP_FS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_FS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000100 SP_BLEND_CNTL: { UNK8 }
00000000 SP_SRGB_CNTL: { 0 }
00000000 SP_FS_RENDER_COMPONENTS: { RT0 = 0 | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
@@ -8438,13 +8438,13 @@ clusters:
00421800 SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = TWO_QUADS | VARYING }
0000001f SP_CS_UNKNOWN_A9B1: 31
00000000 0xa9b2: 00000000
- 00000000 SP_CS_UNKNOWN_A9B3: 0
+ 00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
8c415420 SP_CS_OBJ_START_LO: 0x8c415420
00000000 SP_CS_OBJ_START_HI: 0
- 00000000 0xa9b6: 00000000
- 00000000 0xa9b7: 00000000
- 00000000 0xa9b8: 00000000
- 00000000 0xa9b9: 00000000
+ 00000000 SP_CS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_CS_PVT_MEM_ADDR: 0
+ 00000000 SP_CS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_CS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_CS_TEX_COUNT: 128
00200100 SP_CS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 16 | NIBO = 0 }
00000004 SP_CS_INSTRLEN: 4
@@ -8478,13 +8478,13 @@ clusters:
- context: 1
05100000 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | PIXLODENABLE | 0x1000000 }
00000000 SP_FS_BRANCH_COND: 0
- 00000000 SP_UNKNOWN_A982: 0
+ 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
4bdb43d8 SP_FS_OBJ_START_LO: 0x4bdb43d8
0001af86 SP_FS_OBJ_START_HI: 0x1af86
- 00000000 0xa985: 00000000
- 00000000 0xa986: 00000000
- 00000000 0xa987: 00000000
- 00000000 0xa988: 00000000
+ 00000000 SP_FS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_FS_PVT_MEM_ADDR: 0
+ 00000000 SP_FS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_FS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000100 SP_BLEND_CNTL: { UNK8 }
00000000 SP_SRGB_CNTL: { 0 }
00000000 SP_FS_RENDER_COMPONENTS: { RT0 = 0 | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
@@ -8520,13 +8520,13 @@ clusters:
00421800 SP_CS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 48 | BRANCHSTACK = 8 | THREADSIZE = TWO_QUADS | VARYING }
0000001f SP_CS_UNKNOWN_A9B1: 31
00000000 0xa9b2: 00000000
- 00000000 SP_CS_UNKNOWN_A9B3: 0
+ 00000000 SP_CS_OBJ_FIRST_EXEC_OFFSET: 0
8c415420 SP_CS_OBJ_START_LO: 0x8c415420
00000000 SP_CS_OBJ_START_HI: 0
- 00000000 0xa9b6: 00000000
- 00000000 0xa9b7: 00000000
- 00000000 0xa9b8: 00000000
- 00000000 0xa9b9: 00000000
+ 00000000 SP_CS_PVT_MEM_PARAM: { MEMSIZEPERITEM = 0 | HWSTACKSIZEPERTHREAD = 0 }
+ 00000000 SP_CS_PVT_MEM_ADDR: 0
+ 00000000 SP_CS_PVT_MEM_ADDR+0x1: 0
+ 00000000 SP_CS_PVT_MEM_SIZE: { TOTALPVTMEMSIZE = 0 }
00000080 SP_CS_TEX_COUNT: 128
00200100 SP_CS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 16 | NIBO = 0 }
00000004 SP_CS_INSTRLEN: 4
diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
index 7bbaa5de1fd..3f56d8a65de 100644
--- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
+++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log
@@ -69,8 +69,8 @@ t4 write UCHE_CLIENT_PF (0e19)
t4 write RB_UNKNOWN_8E01 (8e01)
RB_UNKNOWN_8E01: 0
00000000010580a4: 0000: 408e0101 00000000
-t4 write SP_UNKNOWN_A982 (a982)
- SP_UNKNOWN_A982: 0
+t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
+ SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
00000000010580ac: 0000: 48a98201 00000000
t4 write SP_UNKNOWN_A9A8 (a9a8)
SP_UNKNOWN_A9A8: 0
@@ -147,8 +147,8 @@ t4 write PC_PRIMITIVE_CNTL_6 (9b06)
t4 write PC_MULTIVIEW_CNTL (9b07)
PC_MULTIVIEW_CNTL: { VIEWS = 0 }
0000000001058174: 0000: 489b0701 00000000
-t4 write SP_UNKNOWN_A81B (a81b)
- SP_UNKNOWN_A81B: 0
+t4 write SP_VS_OBJ_FIRST_EXEC_OFFSET (a81b)
+ SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
000000000105817c: 0000: 40a81b01 00000000
t4 write SP_UNKNOWN_B183 (b183)
SP_UNKNOWN_B183: 0
@@ -340,10 +340,10 @@ t7 opcode: CP_BLIT (2c) (2 dwords)
+ 00000000 VFD_MODE_CNTL: { 0 }
+ 00000000 VFD_MULTIVIEW_CNTL: { VIEWS = 0 }
!+ 00000001 VFD_ADD_OFFSET: { VERTEX }
- + 00000000 SP_UNKNOWN_A81B: 0
+ + 00000000 SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
+ 00000000 SP_HS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
+ 00000000 SP_GS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 0 | BRANCHSTACK = 0 | THREADSIZE = TWO_QUADS }
- + 00000000 SP_UNKNOWN_A982: 0
+ + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
+ 00000000 SP_UNKNOWN_A9A8: 0
!+ 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | 0x4 }
+ 00000000 SP_IBO_COUNT: 0
diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
index e2f0c7c9634..24fdc6603bf 100644
--- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log
+++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
@@ -135,8 +135,8 @@ t4 write PC_RASTER_CNTL (9980)
t4 write PC_MULTIVIEW_CNTL (9b07)
PC_MULTIVIEW_CNTL: { VIEWS = 0 }
0000000001d91154: 0000: 489b0701 00000000
-t4 write SP_UNKNOWN_A81B (a81b)
- SP_UNKNOWN_A81B: 0
+t4 write SP_VS_OBJ_FIRST_EXEC_OFFSET (a81b)
+ SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
0000000001d9115c: 0000: 40a81b01 00000000
t4 write SP_UNKNOWN_B183 (b183)
SP_UNKNOWN_B183: 0
@@ -603,8 +603,8 @@ t4 write SP_IBO_COUNT (ab20)
0000000001121120: 0120: 000000fc 000000fc 40930101 00ff0004 409b0601 00000000 40a87101 00000000
0000000001121140: 0140: 48910101 00ffff00 48910701 00000000 40a00186 fcfcfcfc 0000fcfc fcfcfcfc
0000000001121160: 0160: 000000fc 0000fcfc 00000000 40887001 00000000 48809401 00000000
-t4 write SP_HS_UNKNOWN_A833 (a833)
- SP_HS_UNKNOWN_A833: 0
+t4 write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
+ SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
0000000001121000: 0000: 40a83301 00000000
t4 write SP_FS_PREFETCH_CNTL (a99e)
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | 0x7000 }
@@ -709,8 +709,8 @@ t4 write HLSQ_UNKNOWN_B980 (b980)
t4 write SP_FS_CTRL_REG0 (a980)
SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS | 0x1000000 }
00000000011210c0: 0000: 40a98001 81100080
-t4 write SP_UNKNOWN_A982 (a982)
- SP_UNKNOWN_A982: 0
+t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
+ SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
00000000011210c8: 0000: 48a98201 00000000
t4 write VPC_VS_LAYER_CNTL (9104)
VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
@@ -1094,7 +1094,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
!+ 00000001 SP_VS_PRIMITIVE_CNTL: { OUT = 1 }
!+ 00000f00 SP_VS_OUT[0].REG: { A_REGID = r0.x | A_COMPMASK = 0xf | B_REGID = r0.x | B_COMPMASK = 0 }
+ 00000000 SP_VS_VPC_DST[0].REG: { OUTLOC0 = 0 | OUTLOC1 = 0 | OUTLOC2 = 0 | OUTLOC3 = 0 }
- + 00000000 SP_UNKNOWN_A81B: 0
+ + 00000000 SP_VS_OBJ_FIRST_EXEC_OFFSET: 0
!+ 01011000 SP_VS_OBJ_START_LO: 0x1011000 base=1011000, offset=0, size=128
+ 00000000 SP_VS_OBJ_START_HI: 0 base=1011000, offset=0, size=128
0000000001011000: 0000: 00000000 03000000 00000000 00000000 00000000 00000000 00000000 00000000
@@ -1119,13 +1119,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
!+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 00000001 SP_VS_INSTRLEN: 1
+ 00000000 SP_HS_UNKNOWN_A831: 0
- + 00000000 SP_HS_UNKNOWN_A833: 0
+ + 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
+ 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
+ 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
+ 00000000 SP_GS_PRIM_SIZE: 0
+ 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 81100080 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 1 | BRANCHSTACK = 0 | THREADSIZE = FOUR_QUADS | MERGEDREGS | 0x1000000 }
- + 00000000 SP_UNKNOWN_A982: 0
+ + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
+ 00000000 SP_SRGB_CNTL: { 0 }
!+ 0000000f SP_FS_RENDER_COMPONENTS: { RT0 = 0xf | RT1 = 0 | RT2 = 0 | RT3 = 0 | RT4 = 0 | RT5 = 0 | RT6 = 0 | RT7 = 0 }
!+ fcfcfc00 SP_FS_OUTPUT_CNTL0: { DEPTH_REGID = r63.x | SAMPMASK_REGID = r63.x | STENCILREF_REGID = r63.x }
@@ -1918,8 +1918,8 @@ t4 write SP_IBO_COUNT (ab20)
00000000011202a0: 02a0: 3dd70a3e 3d3851ec 40d9999a 3d4ac083 3ba3d70a 3de147ae 358637bd 33d6bf95
00000000011202c0: 02c0: 3f0ccccd 41800000 45070000 44b40000 3df5c28f 3f333333 3f266666 3f7851ec
00000000011202e0: 02e0: 3f19999a 3f666666 3f7d70a4 40100000 00000000 3db851ec 00000000 07ee25f4
-t4 write SP_HS_UNKNOWN_A833 (a833)
- SP_HS_UNKNOWN_A833: 0
+t4 write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
+ SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
0000000001120000: 0000: 40a83301 00000000
t4 write SP_FS_PREFETCH_CNTL (a99e)
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | 0x7000 }
@@ -2024,8 +2024,8 @@ t4 write HLSQ_UNKNOWN_B980 (b980)
t4 write SP_FS_CTRL_REG0 (a980)
SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 }
00000000011200c0: 0000: 40a98001 81508980
-t4 write SP_UNKNOWN_A982 (a982)
- SP_UNKNOWN_A982: 0
+t4 write SP_FS_OBJ_FIRST_EXEC_OFFSET (a982)
+ SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
00000000011200c8: 0000: 48a98201 00000000
t4 write VPC_VS_LAYER_CNTL (9104)
VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
@@ -5344,13 +5344,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
+ 00000100 SP_VS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
+ 00000001 SP_VS_INSTRLEN: 1
+ 00000000 SP_HS_UNKNOWN_A831: 0
- + 00000000 SP_HS_UNKNOWN_A833: 0
+ + 00000000 SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
+ 00000000 SP_HS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
+ 00000000 SP_DS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
+ 00000000 SP_GS_PRIM_SIZE: 0
+ 00000000 SP_GS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 81508980 SP_FS_CTRL_REG0: { HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 19 | BRANCHSTACK = 2 | THREADSIZE = FOUR_QUADS | VARYING | MERGEDREGS | 0x1000000 }
- + 00000000 SP_UNKNOWN_A982: 0
+ + 00000000 SP_FS_OBJ_FIRST_EXEC_OFFSET: 0
!+ 01013000 SP_FS_OBJ_START_LO: 0x1013000 base=1013000, offset=0, size=11264
+ 00000000 SP_FS_OBJ_START_HI: 0 base=1013000, offset=0, size=11264
0000000001013000: 0000: 40400000 204cc000 00000000 204cc006 3e99999a 204cc004 20080014 42700008
diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml
index b15d66c59d3..082fd720398 100644
--- a/src/freedreno/registers/adreno/a6xx.xml
+++ b/src/freedreno/registers/adreno/a6xx.xml
@@ -3148,21 +3148,90 @@ to upconvert to 32b float internally?
</reg32>
</array>
- <reg32 offset="0xa81b" name="SP_UNKNOWN_A81B"/>
+ <bitset name="a6xx_sp_xs_pvt_mem_param" inline="yes">
+ <bitfield name="MEMSIZEPERITEM" low="0" high="7" shr="9">
+ <doc>The size of memory that ldp/stp can address.</doc>
+ </bitfield>
+ <bitfield name="HWSTACKSIZEPERTHREAD" low="24" high="31">
+ <doc>
+ Seems to be the same as a3xx. The maximum stack
+ size in units of 4 calls, so a call depth of 7
+ would result in a value of 2.
+ TODO: What's the actual size per call, i.e. the
+ size of the PC? a3xx docs say it's 16 bits
+ there, but the length register now takes 28 bits
+ so it's probably been bumped to 32 bits.
+ </doc>
+ </bitfield>
+ </bitset>
+
+ <bitset name="a6xx_sp_xs_pvt_mem_size" inline="yes">
+ <bitfield name="TOTALPVTMEMSIZE" low="0" high="17" shr="12"/>
+ <bitfield name="PERWAVEMEMLAYOUT" pos="31" type="boolean">
+ <doc>
+ There are four indices used to compute the
+ private memory location for an access:
+
+ - stp/ldp offset
+ - fiber id
+ - wavefront id (a swizzled version of what "getwid" returns)
+ - SP ID (the same as what "getspid" returns)
+
+ The stride for the SP ID is always set by
+ TOTALPVTMEMSIZE. In the per-wave layout, the
+ indices are used in this order:
+
+ - offset % 4 (offset within dword)
+ - fiber id
+ - offset / 4
+ - wavefront id
+ - SP ID
+
+ and the stride for the wavefront ID is
+ MEMSIZEPERITEM, multiplied by 128 (fibers per
+ wavefront). In the per-fiber layout, the indices
+ are used in this order:
+
+ - offset
+ - fiber id % 4
+ - wavefront id
+ - fiber id / 4
+ - SP ID
+
+ and the stride for the fiber id/wavefront id
+ combo is MEMSIZEPERITEM.
+
+ Note: Accesses of more than 1 dword do not work
+ with per-fiber layout. The blob will fall back
+ to per-wave instead.
+ </doc>
+ </bitfield>
+ </bitset>
+
+ <reg32 offset="0xa81b" name="SP_VS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
<reg32 offset="0xa81c" name="SP_VS_OBJ_START_LO"/>
<reg32 offset="0xa81d" name="SP_VS_OBJ_START_HI"/>
+ <reg32 offset="0xa81e" name="SP_VS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+ <reg64 offset="0xa81f" name="SP_VS_PVT_MEM_ADDR" type="waddress" align="32"/>
+ <reg32 offset="0xa821" name="SP_VS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
<reg32 offset="0xa822" name="SP_VS_TEX_COUNT" type="uint"/>
<reg32 offset="0xa823" name="SP_VS_CONFIG" type="a6xx_sp_xs_config"/>
<reg32 offset="0xa824" name="SP_VS_INSTRLEN" type="uint"/>
+ <reg32 offset="0xa825" name="SP_VS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
<reg32 offset="0xa830" name="SP_HS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
<reg32 offset="0xa831" name="SP_HS_UNKNOWN_A831"/>
- <reg32 offset="0xa833" name="SP_HS_UNKNOWN_A833"/>
+
+ <reg32 offset="0xa833" name="SP_HS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
<reg32 offset="0xa834" name="SP_HS_OBJ_START_LO"/>
<reg32 offset="0xa835" name="SP_HS_OBJ_START_HI"/>
+ <reg32 offset="0xa836" name="SP_HS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+ <reg64 offset="0xa837" name="SP_HS_PVT_MEM_ADDR" type="waddress" align="32"/>
+ <reg32 offset="0xa839" name="SP_HS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
<reg32 offset="0xa83a" name="SP_HS_TEX_COUNT" type="uint"/>
<reg32 offset="0xa83b" name="SP_HS_CONFIG" type="a6xx_sp_xs_config"/>
<reg32 offset="0xa83c" name="SP_HS_INSTRLEN" type="uint"/>
+ <reg32 offset="0xa83d" name="SP_HS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
<reg32 offset="0xa840" name="SP_DS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
<reg32 offset="0xa842" name="SP_DS_PRIMITIVE_CNTL">
@@ -3186,12 +3255,16 @@ to upconvert to 32b float internally?
</reg32>
</array>
- <reg32 offset="0xa85b" name="SP_DS_UNKNOWN_A85B"/>
+ <reg32 offset="0xa85b" name="SP_DS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
<reg32 offset="0xa85c" name="SP_DS_OBJ_START_LO"/>
<reg32 offset="0xa85d" name="SP_DS_OBJ_START_HI"/>
+ <reg32 offset="0xa85e" name="SP_DS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+ <reg64 offset="0xa85f" name="SP_DS_PVT_MEM_ADDR" type="waddress" align="32"/>
+ <reg32 offset="0xa861" name="SP_DS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
<reg32 offset="0xa862" name="SP_DS_TEX_COUNT" type="uint"/>
<reg32 offset="0xa863" name="SP_DS_CONFIG" type="a6xx_sp_xs_config"/>
<reg32 offset="0xa864" name="SP_DS_INSTRLEN" type="uint"/>
+ <reg32 offset="0xa865" name="SP_DS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
<reg32 offset="0xa870" name="SP_GS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
<reg32 offset="0xa871" name="SP_GS_PRIM_SIZE">
@@ -3228,11 +3301,16 @@ to upconvert to 32b float internally?
</reg32>
</array>
+ <reg32 offset="0xa88c" name="SP_GS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
<reg32 offset="0xa88d" name="SP_GS_OBJ_START_LO"/>
<reg32 offset="0xa88e" name="SP_GS_OBJ_START_HI"/>
+ <reg32 offset="0xa88f" name="SP_GS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+ <reg64 offset="0xa890" name="SP_GS_PVT_MEM_ADDR" type="waddress" align="32"/>
+ <reg32 offset="0xa892" name="SP_GS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
<reg32 offset="0xa893" name="SP_GS_TEX_COUNT" type="uint"/>
<reg32 offset="0xa894" name="SP_GS_CONFIG" type="a6xx_sp_xs_config"/>
<reg32 offset="0xa895" name="SP_GS_INSTRLEN" type="uint"/>
+ <reg32 offset="0xa896" name="SP_GS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
<reg32 offset="0xa8a0" name="SP_VS_TEX_SAMP_LO"/>
<reg32 offset="0xa8a1" name="SP_VS_TEX_SAMP_HI"/>
@@ -3258,9 +3336,13 @@ to upconvert to 32b float internally?
bit N corresponds to brac.N
-->
</reg32>
- <reg32 offset="0xa982" name="SP_UNKNOWN_A982"/>
+
+ <reg32 offset="0xa982" name="SP_FS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
<reg32 offset="0xa983" name="SP_FS_OBJ_START_LO"/>
<reg32 offset="0xa984" name="SP_FS_OBJ_START_HI"/>
+ <reg32 offset="0xa985" name="SP_FS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+ <reg64 offset="0xa986" name="SP_FS_PVT_MEM_ADDR" type="waddress" align="32"/>
+ <reg32 offset="0xa988" name="SP_FS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
<reg32 offset="0xa989" name="SP_BLEND_CNTL">
<bitfield name="ENABLED" pos="0" type="boolean"/>
@@ -3346,6 +3428,8 @@ to upconvert to 32b float internally?
<!-- always 0x0 ? -->
<reg32 offset="0xa9a8" name="SP_UNKNOWN_A9A8"/>
+ <reg32 offset="0xa9a9" name="SP_FS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11"/>
+
<!-- set for compute shaders, always 0x41 -->
<reg32 offset="0xa9b1" name="SP_CS_UNKNOWN_A9B1" type="uint">
<doc>
@@ -3358,9 +3442,6 @@ to upconvert to 32b float internally?
<bitfield name="SHARED_SIZE_2K" pos="0" type="uint"/>
</reg32>
- <!-- set for compute shaders, always 0x0 -->
- <reg32 offset="0xa9b3" name="SP_CS_UNKNOWN_A9B3" type="uint"/>
-
<reg32 offset="0xa9ba" name="SP_CS_TEX_COUNT" type="uint"/>
<reg32 offset="0xa9e0" name="SP_FS_TEX_SAMP_LO"/>
@@ -3385,10 +3466,23 @@ to upconvert to 32b float internally?
</array>
<reg32 offset="0xa9b0" name="SP_CS_CTRL_REG0" type="a6xx_sp_xs_ctrl_reg0"/>
+ <reg32 offset="0xa9b3" name="SP_CS_OBJ_FIRST_EXEC_OFFSET" type="uint"/>
<reg32 offset="0xa9b4" name="SP_CS_OBJ_START_LO"/>
<reg32 offset="0xa9b5" name="SP_CS_OBJ_START_HI"/>
+ <reg32 offset="0xa9b6" name="SP_CS_PVT_MEM_PARAM" type="a6xx_sp_xs_pvt_mem_param"/>
+ <reg64 offset="0xa9b7" name="SP_CS_PVT_MEM_ADDR" align="32"/>
+ <reg32 offset="0xa9b9" name="SP_CS_PVT_MEM_SIZE" type="a6xx_sp_xs_pvt_mem_size"/>
<reg32 offset="0xa9bb" name="SP_CS_CONFIG" type="a6xx_sp_xs_config"/>
<reg32 offset="0xa9bc" name="SP_CS_INSTRLEN" type="uint"/>
+ <reg32 offset="0xa9bd" name="SP_CS_PVT_MEM_HW_STACK_OFFSET" low="0" high="18" shr="11">
+ <doc>
+ This seems to be be the equivalent of HWSTACKOFFSET in
+ a3xx. The offset formula isn't affected by
+ HWSTACKOFFSETPERTHREAD at all, so the HW return address
+ stack seems to be after all the normal per-SP private
+ memory.
+ </doc>
+ </reg32>
<!--
IBO state for compute shader:
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 102932e5f66..71a2b321239 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -756,7 +756,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4);
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0);
- tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A982, 0);
+ tu_cs_emit_write_reg(cs, REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_MODE_CONTROL,
A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);
@@ -782,7 +782,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_regs(cs, A6XX_VPC_SO_DISABLE(true));
- tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0);
+ tu_cs_emit_write_reg(cs, REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);