summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Zielinski <jan.zielinski@intel.com>2021-06-09 13:19:44 +0200
committerMarge Bot <emma+marge@anholt.net>2021-12-06 23:37:50 +0000
commit855793c6c6bd372ea96681ecbd3f318ad71da223 (patch)
treecbd8efc0c9df58d3bdc2ba774cf46dcdcad21162
parentd22d328859e4a67e6ff738fbd22eaf1d5a09376a (diff)
gallium/swr: Remove driver source
The OpenSWR will be maintained on a classic/LTS branch. Reviewed-by: Dylan Baker <dylan@pnwbakers.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11264>
-rw-r--r--src/gallium/drivers/swr/.clang-format64
-rw-r--r--src/gallium/drivers/swr/meson.build411
-rw-r--r--src/gallium/drivers/swr/rasterizer/.dir-locals.el8
-rw-r--r--src/gallium/drivers/swr/rasterizer/_clang-format114
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp708
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/archrast.h49
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h88
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/events.proto427
-rw-r--r--src/gallium/drivers/swr/rasterizer/archrast/events_private.proto212
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py327
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_backends.py164
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_common.py291
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py80
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py362
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py360
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py383
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/meson.build77
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp55
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp168
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp61
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp174
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp42
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp84
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp46
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp143
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h154
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp109
-rw-r--r--src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp44
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/formats.cpp9298
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/formats.h268
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/intrin.h120
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/isa.hpp231
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/os.cpp314
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/os.h365
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp192
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h227
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h169
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simd16intrin.h168
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdintrin.h322
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib.hpp234
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl593
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl66
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl368
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl196
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl34
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl826
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl255
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl349
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl129
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl34
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl699
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl186
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl132
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl27
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl27
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl27
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl852
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl27
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp332
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp457
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp299
-rw-r--r--src/gallium/drivers/swr/rasterizer/common/swr_assert.h242
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.cpp1802
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/api.h772
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/arena.h490
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.cpp420
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend.h70
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp308
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_impl.h1300
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp454
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp428
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/backends/meson.build57
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.cpp1976
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/binner.h254
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/blend.h348
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.cpp336
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/clip.h1361
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/conservativeRast.h229
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/context.h608
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/depthstencil.h335
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/fifo.hpp138
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/format_conversion.h262
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/format_traits.h4046
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/format_types.h1629
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/format_utils.h939
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.cpp2385
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/frontend.h448
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/knobs.h175
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/knobs_init.h108
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/multisample.h459
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa.h1676
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp3141
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp473
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer.h237
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h1542
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp94
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h185
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/ringbuffer.h95
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state.h1240
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/state_funcs.h67
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tessellator.cpp2689
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tessellator.h202
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tessellator.hpp471
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.cpp1423
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/threads.h82
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp454
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tilemgr.h354
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/tileset.h102
-rw-r--r--src/gallium/drivers/swr/rasterizer/core/utils.h392
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp853
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/JitManager.h212
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp924
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h129
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder.cpp219
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder.h181
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp396
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h136
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_math.h34
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp767
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h170
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp1125
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h212
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp2332
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h150
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp962
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h38
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/jit_api.h113
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp183
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/meson.build63
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp49
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp49
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp379
-rw-r--r--src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h101
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp305
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/Convert.h730
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp50
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/InitMemory.h83
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp157
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/LoadTile.h354
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/LoadTile_Linear.cpp39
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileX.cpp37
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileY.cpp39
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp129
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile.h2051
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear.cpp35
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear2.cpp33
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileW.cpp35
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX.cpp33
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX2.cpp33
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY.cpp34
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY2.cpp33
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h66
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h697
-rw-r--r--src/gallium/drivers/swr/rasterizer/memory/tilingtraits.h207
-rw-r--r--src/gallium/drivers/swr/swr_clear.cpp100
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp595
-rw-r--r--src/gallium/drivers/swr/swr_context.h236
-rw-r--r--src/gallium/drivers/swr/swr_draw.cpp399
-rw-r--r--src/gallium/drivers/swr/swr_fence.cpp159
-rw-r--r--src/gallium/drivers/swr/swr_fence.h89
-rw-r--r--src/gallium/drivers/swr/swr_fence_work.cpp213
-rw-r--r--src/gallium/drivers/swr/swr_fence_work.h56
-rw-r--r--src/gallium/drivers/swr/swr_loader.cpp160
-rw-r--r--src/gallium/drivers/swr/swr_memory.h61
-rw-r--r--src/gallium/drivers/swr/swr_public.h57
-rw-r--r--src/gallium/drivers/swr/swr_query.cpp272
-rw-r--r--src/gallium/drivers/swr/swr_query.h48
-rw-r--r--src/gallium/drivers/swr/swr_resource.h145
-rw-r--r--src/gallium/drivers/swr/swr_scratch.cpp106
-rw-r--r--src/gallium/drivers/swr/swr_scratch.h66
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp1155
-rw-r--r--src/gallium/drivers/swr/swr_screen.h86
-rw-r--r--src/gallium/drivers/swr/swr_shader.cpp3040
-rw-r--r--src/gallium/drivers/swr/swr_shader.h175
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp2243
-rw-r--r--src/gallium/drivers/swr/swr_state.h426
-rw-r--r--src/gallium/drivers/swr/swr_tex_sample.cpp376
-rw-r--r--src/gallium/drivers/swr/swr_tex_sample.h48
178 files changed, 0 insertions, 85594 deletions
diff --git a/src/gallium/drivers/swr/.clang-format b/src/gallium/drivers/swr/.clang-format
deleted file mode 100644
index 0ec65a5de88..00000000000
--- a/src/gallium/drivers/swr/.clang-format
+++ /dev/null
@@ -1,64 +0,0 @@
----
-Language: Cpp
-AccessModifierOffset: -3
-AlignAfterOpenBracket: true
-AlignEscapedNewlinesLeft: false
-AlignOperands: false
-AlignTrailingComments: false
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: false
-AllowShortLoopsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: All
-AlwaysBreakAfterDefinitionReturnType: true
-AlwaysBreakTemplateDeclarations: false
-AlwaysBreakBeforeMultilineStrings: false
-BreakBeforeBinaryOperators: NonAssignment
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: true
-BinPackParameters: false
-BinPackArguments: false
-ColumnLimit: 78
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 3
-DerivePointerAlignment: false
-ExperimentalAutoDetectBinPacking: false
-IndentCaseLabels: false
-IndentWrappedFunctionNames: false
-IndentFunctionDeclarationAfterType: false
-MaxEmptyLinesToKeep: 2
-KeepEmptyLinesAtTheStartOfBlocks: true
-NamespaceIndentation: Inner
-ObjCBlockIndentWidth: 3
-ObjCSpaceAfterProperty: true
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakBeforeFirstCallParameter: 19
-PenaltyBreakComment: 300
-PenaltyBreakString: 1000
-PenaltyBreakFirstLessLess: 120
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 0
-PointerAlignment: Right
-SpacesBeforeTrailingComments: 1
-Cpp11BracedListStyle: true
-Standard: Cpp11
-IndentWidth: 3
-TabWidth: 8
-UseTab: Never
-BreakBeforeBraces: Linux
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-SpacesInAngles: false
-SpaceInEmptyParentheses: false
-SpacesInCStyleCastParentheses: false
-SpaceAfterCStyleCast: false
-SpacesInContainerLiterals: true
-SpaceBeforeAssignmentOperators: true
-ContinuationIndentWidth: 3
-CommentPragmas: '^ IWYU pragma:'
-ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
-SpaceBeforeParens: ControlStatements
-DisableFormat: false
-...
-
diff --git a/src/gallium/drivers/swr/meson.build b/src/gallium/drivers/swr/meson.build
deleted file mode 100644
index ac712d80461..00000000000
--- a/src/gallium/drivers/swr/meson.build
+++ /dev/null
@@ -1,411 +0,0 @@
-# Copyright © 2017-2020 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-files_swr_common = files(
- 'rasterizer/common/formats.cpp',
- 'rasterizer/common/formats.h',
- 'rasterizer/common/intrin.h',
- 'rasterizer/common/isa.hpp',
- 'rasterizer/common/os.cpp',
- 'rasterizer/common/os.h',
- 'rasterizer/common/rdtsc_buckets.cpp',
- 'rasterizer/common/rdtsc_buckets.h',
- 'rasterizer/common/rdtsc_buckets_shared.h',
- 'rasterizer/common/rdtsc_buckets_shared.h',
- 'rasterizer/common/simd16intrin.h',
- 'rasterizer/common/simdintrin.h',
- 'rasterizer/common/simdlib.hpp',
- 'rasterizer/common/simdlib_interface.hpp',
- 'rasterizer/common/simdlib_types.hpp',
- 'rasterizer/common/swr_assert.cpp',
- 'rasterizer/common/swr_assert.h',
-)
-
-files_swr_mesa = files(
- 'swr_loader.cpp',
- 'swr_clear.cpp',
- 'swr_context.cpp',
- 'swr_context.h',
- 'swr_draw.cpp',
- 'swr_public.h',
- 'swr_resource.h',
- 'swr_screen.cpp',
- 'swr_screen.h',
- 'swr_state.cpp',
- 'swr_state.h',
- 'swr_tex_sample.cpp',
- 'swr_tex_sample.h',
- 'swr_scratch.h',
- 'swr_scratch.cpp',
- 'swr_shader.cpp',
- 'swr_shader.h',
- 'swr_memory.h',
- 'swr_fence.h',
- 'swr_fence.cpp',
- 'swr_fence_work.h',
- 'swr_fence_work.cpp',
- 'swr_query.h',
- 'swr_query.cpp',
- 'rasterizer/jitter/blend_jit.cpp',
- 'rasterizer/jitter/blend_jit.h',
- 'rasterizer/jitter/builder.cpp',
- 'rasterizer/jitter/builder.h',
- 'rasterizer/jitter/builder_math.h',
- 'rasterizer/jitter/builder_mem.cpp',
- 'rasterizer/jitter/builder_mem.h',
- 'rasterizer/jitter/builder_gfx_mem.cpp',
- 'rasterizer/jitter/builder_gfx_mem.h',
- 'rasterizer/jitter/builder_misc.cpp',
- 'rasterizer/jitter/builder_misc.h',
- 'rasterizer/jitter/fetch_jit.cpp',
- 'rasterizer/jitter/fetch_jit.h',
- 'rasterizer/jitter/jit_api.h',
- 'rasterizer/jitter/JitManager.cpp',
- 'rasterizer/jitter/JitManager.h',
- 'rasterizer/jitter/streamout_jit.cpp',
- 'rasterizer/jitter/streamout_jit.h',
- 'rasterizer/jitter/shader_lib/DebugOutput.cpp',
- 'rasterizer/jitter/shader_lib/Scatter.cpp',
- 'rasterizer/jitter/functionpasses/lower_x86.cpp',
- 'rasterizer/memory/SurfaceState.h'
-)
-
-files_swr_arch = files(
- 'rasterizer/archrast/archrast.cpp',
- 'rasterizer/archrast/archrast.h',
- 'rasterizer/archrast/eventmanager.h',
- 'rasterizer/core/api.cpp',
- 'rasterizer/core/api.h',
- 'rasterizer/core/arena.h',
- 'rasterizer/core/backend.cpp',
- 'rasterizer/core/backend_clear.cpp',
- 'rasterizer/core/backend_sample.cpp',
- 'rasterizer/core/backend_singlesample.cpp',
- 'rasterizer/core/backend.h',
- 'rasterizer/core/backend_impl.h',
- 'rasterizer/core/binner.cpp',
- 'rasterizer/core/binner.h',
- 'rasterizer/core/blend.h',
- 'rasterizer/core/clip.cpp',
- 'rasterizer/core/clip.h',
- 'rasterizer/core/conservativeRast.h',
- 'rasterizer/core/context.h',
- 'rasterizer/core/depthstencil.h',
- 'rasterizer/core/fifo.hpp',
- 'rasterizer/core/format_conversion.h',
- 'rasterizer/core/format_traits.h',
- 'rasterizer/core/format_types.h',
- 'rasterizer/core/format_utils.h',
- 'rasterizer/core/frontend.cpp',
- 'rasterizer/core/frontend.h',
- 'rasterizer/core/knobs.h',
- 'rasterizer/core/knobs_init.h',
- 'rasterizer/core/multisample.h',
- 'rasterizer/core/pa_avx.cpp',
- 'rasterizer/core/pa.h',
- 'rasterizer/core/rasterizer.cpp',
- 'rasterizer/core/rasterizer.h',
- 'rasterizer/core/rasterizer_impl.h',
- 'rasterizer/core/rdtsc_core.cpp',
- 'rasterizer/core/rdtsc_core.h',
- 'rasterizer/core/ringbuffer.h',
- 'rasterizer/core/state.h',
- 'rasterizer/core/state_funcs.h',
- 'rasterizer/core/tessellator.h',
- 'rasterizer/core/tessellator.hpp',
- 'rasterizer/core/tessellator.cpp',
- 'rasterizer/core/threads.cpp',
- 'rasterizer/core/threads.h',
- 'rasterizer/core/tilemgr.cpp',
- 'rasterizer/core/tilemgr.h',
- 'rasterizer/core/tileset.h',
- 'rasterizer/core/utils.h',
- 'rasterizer/memory/ClearTile.cpp',
- 'rasterizer/memory/Convert.h',
- 'rasterizer/memory/LoadTile.cpp',
- 'rasterizer/memory/LoadTile.h',
- 'rasterizer/memory/LoadTile_Linear.cpp',
- 'rasterizer/memory/LoadTile_TileX.cpp',
- 'rasterizer/memory/LoadTile_TileY.cpp',
- 'rasterizer/memory/StoreTile.cpp',
- 'rasterizer/memory/StoreTile.h',
- 'rasterizer/memory/StoreTile_Linear2.cpp',
- 'rasterizer/memory/StoreTile_Linear.cpp',
- 'rasterizer/memory/StoreTile_TileW.cpp',
- 'rasterizer/memory/StoreTile_TileX2.cpp',
- 'rasterizer/memory/StoreTile_TileX.cpp',
- 'rasterizer/memory/StoreTile_TileY2.cpp',
- 'rasterizer/memory/StoreTile_TileY.cpp',
- 'rasterizer/memory/TilingFunctions.h',
- 'rasterizer/memory/tilingtraits.h',
- 'rasterizer/memory/InitMemory.h',
- 'rasterizer/memory/InitMemory.cpp',
- 'rasterizer/memory/SurfaceState.h'
-)
-
-swr_context_files = files('swr_context.h')
-swr_state_files = files('rasterizer/core/state.h')
-swr_surf_state_files = files('rasterizer/memory/SurfaceState.h')
-swr_event_proto_files = files('rasterizer/archrast/events.proto')
-swr_event_pproto_files = files('rasterizer/archrast/events_private.proto')
-swr_gen_backend_files = files('rasterizer/codegen/templates/gen_backend.cpp')
-swr_gen_rasterizer_files = files('rasterizer/codegen/templates/gen_rasterizer.cpp')
-swr_gen_header_init_files = files('rasterizer/codegen/templates/gen_header_init.hpp')
-
-swr_gen_llvm_ir_macros_py = files('rasterizer/codegen/gen_llvm_ir_macros.py')
-swr_gen_backends_py = files('rasterizer/codegen/gen_backends.py')
-
-swr_gen_builder_depends = files(
- 'rasterizer/codegen/templates/gen_builder.hpp',
- 'rasterizer/codegen/gen_common.py'
- )
-
-
-subdir('rasterizer/jitter')
-subdir('rasterizer/codegen')
-subdir('rasterizer/core/backends')
-
-swr_incs = include_directories(
- 'rasterizer/codegen', 'rasterizer/core', 'rasterizer/jitter',
- 'rasterizer/archrast', 'rasterizer',
-)
-
-swr_cpp_args = []
-if cpp.has_argument('-fno-strict-aliasing')
- swr_cpp_args += '-fno-strict-aliasing'
-endif
-if cpp.has_argument('-Wno-aligned-new')
- swr_cpp_args += '-Wno-aligned-new'
-endif
-
-
-swr_arch_libs = []
-swr_defines = []
-
-swr_avx_args = cpp.first_supported_argument(
- '-target-cpu=sandybridge', '-mavx', '-march=core-avx', '-tp=sandybridge',
- '/arch:AVX',
-)
-if swr_avx_args == []
- error('Cannot find AVX support for swr. (these are required for SWR an all architectures.)')
-endif
-
-shared_swr = get_option('shared-swr')
-if not shared_swr
- if with_swr_arches.length() > 1
- error('When SWR is linked statically only one architecture is allowed.')
- endif
- swr_defines += '-DHAVE_SWR_BUILTIN'
-endif
-
-if with_swr_arches.contains('skx')
- swr_skx_args = cpp.first_supported_argument(
- '-march=skylake-avx512', '-target-cpu=x86-skylake', '-xCORE-AVX512',
- )
- if swr_skx_args == []
- error('Cannot find SKX support for swr.')
- endif
-
- swr_defines += '-DHAVE_SWR_SKX'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrSKX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_skx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrSKX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_skx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
-endif
-
-if with_swr_arches.contains('knl')
- swr_knl_args = cpp.first_supported_argument(
- '-march=knl', '-target-cpu=mic-knl', '-xMIC-AVX512',
- )
- if swr_knl_args == []
- error('Cannot find KNL support for swr.')
- endif
-
- swr_defines += '-DHAVE_SWR_KNL'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrKNL',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_knl_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512', '-DSIMD_ARCH_KNIGHTS',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrKNL',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_knl_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX512', '-DSIMD_ARCH_KNIGHTS',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
-endif
-
-
-if with_swr_arches.contains('avx2')
- swr_avx2_args = cpp.first_supported_argument(
- '-target-cpu=haswell', '-march=core-avx2', '-tp=haswell', '/arch:AVX2',
- )
- if swr_avx2_args == []
- if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c'])
- swr_avx2_args = ['-mavx2', '-mfma', '-mbmi2', '-mf16c']
- else
- error('Cannot find AVX2 support for swr.')
- endif
- endif
-
- swr_defines += '-DHAVE_SWR_AVX2'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrAVX2',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx2_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX2',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrAVX2',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx2_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX2',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
-endif
-
-if with_swr_arches.contains('avx')
- swr_defines += '-DHAVE_SWR_AVX'
- if shared_swr
- swr_arch_libs += shared_library(
- 'swrAVX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- version : '0.0.0',
- soversion : host_machine.system() == 'windows' ? '' : '0',
- install : true,
- name_prefix : host_machine.system() == 'windows' ? '' : 'lib',
- )
- else
- swr_arch_libs += static_library(
- 'swrAVX',
- [files_swr_common, files_swr_arch],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx_args,
- '-DKNOB_ARCH=KNOB_ARCH_AVX',
- ],
- gnu_symbol_visibility : 'hidden',
- link_args : [ld_args_gc_sections],
- include_directories : [swr_incs],
- dependencies : [dep_thread, dep_llvm],
- )
- endif
-endif
-
-
-if swr_arch_libs == []
- error('SWR configured, but no SWR architectures configured')
-endif
-
-# The swr_avx_args are needed for intrensic usage in swr api headers.
-libmesaswr = static_library(
- 'mesaswr',
- [files_swr_mesa, files_swr_common, gen_knobs_h, gen_knobs_cpp,
- gen_builder_hpp, gen_builder_meta_hpp, gen_builder_intrin_hpp],
- cpp_args : [
- cpp_msvc_compat_args, swr_cpp_args, swr_avx_args,
- swr_defines,
- ],
- gnu_symbol_visibility : 'hidden',
- include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, swr_incs],
- dependencies : [dep_llvm, idep_mesautil],
-)
-
-link_libs = [libmesaswr]
-if not shared_swr
- link_libs += swr_arch_libs
-endif
-
-driver_swr = declare_dependency(
- compile_args : '-DGALLIUM_SWR',
- link_with : link_libs
-)
diff --git a/src/gallium/drivers/swr/rasterizer/.dir-locals.el b/src/gallium/drivers/swr/rasterizer/.dir-locals.el
deleted file mode 100644
index 2b04c18a9bb..00000000000
--- a/src/gallium/drivers/swr/rasterizer/.dir-locals.el
+++ /dev/null
@@ -1,8 +0,0 @@
-((prog-mode
- (c-basic-offset . 4)
- (c-file-style . "k&r")
- (fill-column . 78)
- (indent-tabs-mode . nil)
- (show-trailing-whitespace . t)
- )
- )
diff --git a/src/gallium/drivers/swr/rasterizer/_clang-format b/src/gallium/drivers/swr/rasterizer/_clang-format
deleted file mode 100644
index ed4b9b409d8..00000000000
--- a/src/gallium/drivers/swr/rasterizer/_clang-format
+++ /dev/null
@@ -1,114 +0,0 @@
----
-Language: Cpp
-# BasedOnStyle: LLVM
-AccessModifierOffset: -4
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: true
-AlignConsecutiveDeclarations: true
-AlignEscapedNewlines: Left
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-AllowShortIfStatementsOnASingleLine: false
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: false
-BinPackParameters: false
-BraceWrapping:
- AfterClass: true
- AfterControlStatement: true
- AfterEnum: true
- AfterFunction: true
- AfterNamespace: true
- AfterObjCDeclaration: true
- AfterStruct: true
- AfterUnion: true
- #AfterExternBlock: false
- BeforeCatch: true
- BeforeElse: true
- IndentBraces: false
- SplitEmptyFunction: true
- SplitEmptyRecord: true
- SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Custom
-BreakBeforeInheritanceComma: false
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: AfterColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit: 100
-CommentPragmas: '^ IWYU pragma:'
-CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat: false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:
- - foreach
- - Q_FOREACH
- - BOOST_FOREACH
-#IncludeBlocks: Preserve
-IncludeCategories:
- - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
- Priority: 2
- - Regex: '^(<|"(gtest|gmock|isl|json)/)'
- Priority: 3
- - Regex: '.*'
- Priority: 1
-IncludeIsMainRegex: '(Test)?$'
-IndentCaseLabels: false
-#IndentPPDirectives: AfterHash
-IndentWidth: 4
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd: ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: All
-ObjCBlockIndentWidth: 4
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 19
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 60
-PointerAlignment: Left
-#RawStringFormats:
-# - Delimiter: pb
-# Language: TextProto
-# BasedOnStyle: google
-ReflowComments: true
-SortIncludes: false
-SortUsingDeclarations: true
-SpaceAfterCStyleCast: false
-SpaceAfterTemplateKeyword: true
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: Cpp11
-TabWidth: 4
-UseTab: Never
-...
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
deleted file mode 100644
index bcdc6d01358..00000000000
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ /dev/null
@@ -1,708 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file archrast.cpp
- *
- * @brief Implementation for archrast.
- *
- ******************************************************************************/
-#include <sys/stat.h>
-
-#include <atomic>
-#include <map>
-
-#include "common/os.h"
-#include "archrast/archrast.h"
-#include "archrast/eventmanager.h"
-#include "gen_ar_event.hpp"
-#include "gen_ar_eventhandlerfile.hpp"
-
-namespace ArchRast
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief struct that keeps track of depth and stencil event information
- struct DepthStencilStats
- {
- uint32_t earlyZTestPassCount = 0;
- uint32_t earlyZTestFailCount = 0;
- uint32_t lateZTestPassCount = 0;
- uint32_t lateZTestFailCount = 0;
- uint32_t earlyStencilTestPassCount = 0;
- uint32_t earlyStencilTestFailCount = 0;
- uint32_t lateStencilTestPassCount = 0;
- uint32_t lateStencilTestFailCount = 0;
- };
-
- struct CStats
- {
- uint32_t trivialRejectCount;
- uint32_t trivialAcceptCount;
- uint32_t mustClipCount;
- };
-
- struct TEStats
- {
- uint32_t inputPrims = 0;
- //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
- };
-
- struct GSStateInfo
- {
- uint32_t inputPrimCount;
- uint32_t primGeneratedCount;
- uint32_t vertsInput;
- };
-
- struct RastStats
- {
- uint32_t rasterTiles = 0;
- };
-
- struct CullStats
- {
- uint32_t degeneratePrimCount = 0;
- uint32_t backfacePrimCount = 0;
- };
-
- struct AlphaStats
- {
- uint32_t alphaTestCount = 0;
- uint32_t alphaBlendCount = 0;
- };
-
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Event handler that handles API thread events. This is shared
- /// between the API and its caller (e.g. driver shim) but typically
- /// there is only a single API thread per context. So you can save
- /// information in the class to be used for other events.
- class EventHandlerApiStats : public EventHandlerFile
- {
- public:
- EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
- {
-#if defined(_WIN32)
- // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
- // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
- // exists, this will attempt to copy it the first time we get here to package it with
- // the stats. Otherwise, the user would need to specify the events.proto location when
- // parsing the stats in post.
- std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
- eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
- eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
- << "\\events.proto" << std::ends;
-
- // If event.proto already exists, we're done; else do the copy
- struct stat buf; // Use a Posix stat for file existence check
- if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
- {
- // Now check to make sure the events.proto source exists
- if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
- {
- std::ifstream srcFile;
- srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
- if (srcFile.is_open())
- {
- // Just do a binary buffer copy
- std::ofstream dstFile;
- dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
- dstFile << srcFile.rdbuf();
- dstFile.close();
- }
- srcFile.close();
- }
- }
-#endif
- }
-
- virtual void Handle(const DrawInstancedEvent& event)
- {
- DrawInfoEvent e(event.data.drawId,
- ArchRast::Instanced,
- event.data.topology,
- event.data.numVertices,
- 0,
- 0,
- event.data.startVertex,
- event.data.numInstances,
- event.data.startInstance,
- event.data.tsEnable,
- event.data.gsEnable,
- event.data.soEnable,
- event.data.soTopology,
- event.data.splitId);
-
- EventHandlerFile::Handle(e);
- }
-
- virtual void Handle(const DrawIndexedInstancedEvent& event)
- {
- DrawInfoEvent e(event.data.drawId,
- ArchRast::IndexedInstanced,
- event.data.topology,
- 0,
- event.data.numIndices,
- event.data.indexOffset,
- event.data.baseVertex,
- event.data.numInstances,
- event.data.startInstance,
- event.data.tsEnable,
- event.data.gsEnable,
- event.data.soEnable,
- event.data.soTopology,
- event.data.splitId);
-
- EventHandlerFile::Handle(e);
- }
- };
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Event handler that handles worker thread events. There is one
- /// event handler per thread. The python script will need to sum
- /// up counters across all of the threads.
- class EventHandlerWorkerStats : public EventHandlerFile
- {
- public:
- EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
- {
- memset(mShaderStats, 0, sizeof(mShaderStats));
- }
-
- virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
- {
- // earlyZ test compute
- mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSingleSample.earlyZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // earlyStencil test compute
- mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSingleSample.earlyStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- // earlyZ test single and multi sample
- mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.earlyZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // earlyStencil test single and multi sample
- mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.earlyStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- mNeedFlush = true;
- }
-
- virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
- {
- // earlyZ test compute
- mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSampleRate.earlyZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // earlyStencil test compute
- mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSampleRate.earlyStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- // earlyZ test single and multi sample
- mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.earlyZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // earlyStencil test single and multi sample
- mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.earlyStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- mNeedFlush = true;
- }
-
- virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
- {
- // earlyZ test compute
- mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSNullPS.earlyZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // earlyStencil test compute
- mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSNullPS.earlyStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
- mNeedFlush = true;
- }
-
- virtual void Handle(const LateDepthStencilInfoSingleSample& event)
- {
- // lateZ test compute
- mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSingleSample.lateZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // lateStencil test compute
- mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSingleSample.lateStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- // lateZ test single and multi sample
- mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.lateZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // lateStencil test single and multi sample
- mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.lateStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- mNeedFlush = true;
- }
-
- virtual void Handle(const LateDepthStencilInfoSampleRate& event)
- {
- // lateZ test compute
- mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSampleRate.lateZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // lateStencil test compute
- mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSampleRate.lateStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- // lateZ test single and multi sample
- mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.lateZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // lateStencil test single and multi sample
- mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.lateStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- mNeedFlush = true;
- }
-
- virtual void Handle(const LateDepthStencilInfoNullPS& event)
- {
- // lateZ test compute
- mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSNullPS.lateZTestFailCount +=
- _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
-
- // lateStencil test compute
- mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSNullPS.lateStencilTestFailCount +=
- _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
- mNeedFlush = true;
- }
-
- virtual void Handle(const EarlyDepthInfoPixelRate& event)
- {
- // earlyZ test compute
- mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
- mDSPixelRate.earlyZTestFailCount +=
- (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
- mNeedFlush = true;
- }
-
-
- virtual void Handle(const LateDepthInfoPixelRate& event)
- {
- // lateZ test compute
- mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
- mDSPixelRate.lateZTestFailCount +=
- (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
- mNeedFlush = true;
- }
-
-
- virtual void Handle(const ClipInfoEvent& event)
- {
- mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
- mClipper.trivialRejectCount +=
- event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
- mClipper.trivialAcceptCount +=
- _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
- }
-
- void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
- {
- pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
- pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
- pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
- pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
- pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
- pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
- pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
- pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
- pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
- pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
- pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
- pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
- }
-
- virtual void Handle(const VSStats& event)
- {
- SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
- UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
- }
-
- virtual void Handle(const GSStats& event)
- {
- SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
- UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
- }
-
- virtual void Handle(const DSStats& event)
- {
- SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
- UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
- }
-
- virtual void Handle(const HSStats& event)
- {
- SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
- UpdateStats(&mShaderStats[SHADER_HULL], pStats);
- }
-
- virtual void Handle(const PSStats& event)
- {
- SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
- UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
- mNeedFlush = true;
- }
-
- virtual void Handle(const CSStats& event)
- {
- SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
- UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
- mNeedFlush = true;
- }
-
- // Flush cached events for this draw
- virtual void FlushDraw(uint32_t drawId)
- {
- if (mNeedFlush == false)
- return;
-
- EventHandlerFile::Handle(PSInfo(drawId,
- mShaderStats[SHADER_PIXEL].numInstExecuted,
- mShaderStats[SHADER_PIXEL].numSampleExecuted,
- mShaderStats[SHADER_PIXEL].numSampleLExecuted,
- mShaderStats[SHADER_PIXEL].numSampleBExecuted,
- mShaderStats[SHADER_PIXEL].numSampleCExecuted,
- mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
- mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
- mShaderStats[SHADER_PIXEL].numGather4Executed,
- mShaderStats[SHADER_PIXEL].numGather4CExecuted,
- mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
- mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
- mShaderStats[SHADER_PIXEL].numLodExecuted));
- EventHandlerFile::Handle(CSInfo(drawId,
- mShaderStats[SHADER_COMPUTE].numInstExecuted,
- mShaderStats[SHADER_COMPUTE].numSampleExecuted,
- mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
- mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
- mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
- mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
- mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
- mShaderStats[SHADER_COMPUTE].numGather4Executed,
- mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
- mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
- mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
- mShaderStats[SHADER_COMPUTE].numLodExecuted));
-
- // singleSample
- EventHandlerFile::Handle(EarlyZSingleSample(
- drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZSingleSample(
- drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
- EventHandlerFile::Handle(
- EarlyStencilSingleSample(drawId,
- mDSSingleSample.earlyStencilTestPassCount,
- mDSSingleSample.earlyStencilTestFailCount));
- EventHandlerFile::Handle(
- LateStencilSingleSample(drawId,
- mDSSingleSample.lateStencilTestPassCount,
- mDSSingleSample.lateStencilTestFailCount));
-
- // sampleRate
- EventHandlerFile::Handle(EarlyZSampleRate(
- drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZSampleRate(
- drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
- EventHandlerFile::Handle(
- EarlyStencilSampleRate(drawId,
- mDSSampleRate.earlyStencilTestPassCount,
- mDSSampleRate.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencilSampleRate(drawId,
- mDSSampleRate.lateStencilTestPassCount,
- mDSSampleRate.lateStencilTestFailCount));
-
- // combined
- EventHandlerFile::Handle(
- EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
- EventHandlerFile::Handle(
- LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
- EventHandlerFile::Handle(EarlyStencil(drawId,
- mDSCombined.earlyStencilTestPassCount,
- mDSCombined.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencil(drawId,
- mDSCombined.lateStencilTestPassCount,
- mDSCombined.lateStencilTestFailCount));
-
- // pixelRate
- EventHandlerFile::Handle(EarlyZPixelRate(
- drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZPixelRate(
- drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
-
-
- // NullPS
- EventHandlerFile::Handle(
- EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilNullPS(
- drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
-
- // Rasterized Subspans
- EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
-
- // Alpha Subspans
- EventHandlerFile::Handle(
- AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
-
- // Primitive Culling
- EventHandlerFile::Handle(
- CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
-
- mDSSingleSample = {};
- mDSSampleRate = {};
- mDSCombined = {};
- mDSPixelRate = {};
- mDSNullPS = {};
-
- rastStats = {};
- mCullStats = {};
- mAlphaStats = {};
-
- mShaderStats[SHADER_PIXEL] = {};
- mShaderStats[SHADER_COMPUTE] = {};
-
- mNeedFlush = false;
- }
-
- virtual void Handle(const FrontendDrawEndEvent& event)
- {
- // Clipper
- EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
- mClipper.trivialRejectCount,
- mClipper.trivialAcceptCount,
- mClipper.mustClipCount));
-
- // Tesselator
- EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
-
- // Geometry Shader
- EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
- EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
- EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
-
- EventHandlerFile::Handle(VSInfo(event.data.drawId,
- mShaderStats[SHADER_VERTEX].numInstExecuted,
- mShaderStats[SHADER_VERTEX].numSampleExecuted,
- mShaderStats[SHADER_VERTEX].numSampleLExecuted,
- mShaderStats[SHADER_VERTEX].numSampleBExecuted,
- mShaderStats[SHADER_VERTEX].numSampleCExecuted,
- mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
- mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
- mShaderStats[SHADER_VERTEX].numGather4Executed,
- mShaderStats[SHADER_VERTEX].numGather4CExecuted,
- mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
- mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
- mShaderStats[SHADER_VERTEX].numLodExecuted));
- EventHandlerFile::Handle(HSInfo(event.data.drawId,
- mShaderStats[SHADER_HULL].numInstExecuted,
- mShaderStats[SHADER_HULL].numSampleExecuted,
- mShaderStats[SHADER_HULL].numSampleLExecuted,
- mShaderStats[SHADER_HULL].numSampleBExecuted,
- mShaderStats[SHADER_HULL].numSampleCExecuted,
- mShaderStats[SHADER_HULL].numSampleCLZExecuted,
- mShaderStats[SHADER_HULL].numSampleCDExecuted,
- mShaderStats[SHADER_HULL].numGather4Executed,
- mShaderStats[SHADER_HULL].numGather4CExecuted,
- mShaderStats[SHADER_HULL].numGather4CPOExecuted,
- mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
- mShaderStats[SHADER_HULL].numLodExecuted));
- EventHandlerFile::Handle(DSInfo(event.data.drawId,
- mShaderStats[SHADER_DOMAIN].numInstExecuted,
- mShaderStats[SHADER_DOMAIN].numSampleExecuted,
- mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
- mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
- mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
- mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
- mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
- mShaderStats[SHADER_DOMAIN].numGather4Executed,
- mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
- mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
- mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
- mShaderStats[SHADER_DOMAIN].numLodExecuted));
- EventHandlerFile::Handle(GSInfo(event.data.drawId,
- mShaderStats[SHADER_GEOMETRY].numInstExecuted,
- mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
- mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
- mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
- mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
- mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
- mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
- mShaderStats[SHADER_GEOMETRY].numGather4Executed,
- mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
- mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
- mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
- mShaderStats[SHADER_GEOMETRY].numLodExecuted));
-
- mShaderStats[SHADER_VERTEX] = {};
- mShaderStats[SHADER_HULL] = {};
- mShaderStats[SHADER_DOMAIN] = {};
- mShaderStats[SHADER_GEOMETRY] = {};
-
- // Reset Internal Counters
- mClipper = {};
- mTS = {};
- mGS = {};
- }
-
- virtual void Handle(const GSPrimInfo& event)
- {
- mGS.inputPrimCount += event.data.inputPrimCount;
- mGS.primGeneratedCount += event.data.primGeneratedCount;
- mGS.vertsInput += event.data.vertsInput;
- }
-
- virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
-
- virtual void Handle(const RasterTileCount& event)
- {
- rastStats.rasterTiles += event.data.rasterTiles;
- }
-
- virtual void Handle(const CullInfoEvent& event)
- {
- mCullStats.degeneratePrimCount += _mm_popcnt_u32(
- event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
- mCullStats.backfacePrimCount += _mm_popcnt_u32(
- event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
- }
-
- virtual void Handle(const AlphaInfoEvent& event)
- {
- mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
- mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
- }
-
- protected:
- bool mNeedFlush;
- // Per draw stats
- DepthStencilStats mDSSingleSample = {};
- DepthStencilStats mDSSampleRate = {};
- DepthStencilStats mDSPixelRate = {};
- DepthStencilStats mDSCombined = {};
- DepthStencilStats mDSNullPS = {};
- DepthStencilStats mDSOmZ = {};
- CStats mClipper = {};
- TEStats mTS = {};
- GSStateInfo mGS = {};
- RastStats rastStats = {};
- CullStats mCullStats = {};
- AlphaStats mAlphaStats = {};
-
- SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
-
- };
-
- static EventManager* FromHandle(HANDLE hThreadContext)
- {
- return reinterpret_cast<EventManager*>(hThreadContext);
- }
-
- // Construct an event manager and associate a handler with it.
- HANDLE CreateThreadContext(AR_THREAD type)
- {
- // Can we assume single threaded here?
- static std::atomic<uint32_t> counter(0);
- uint32_t id = counter.fetch_add(1);
-
- EventManager* pManager = new EventManager();
-
- if (pManager)
- {
- EventHandlerFile* pHandler = nullptr;
-
- if (type == AR_THREAD::API)
- {
- pHandler = new EventHandlerApiStats(id);
- pManager->Attach(pHandler);
- pHandler->Handle(ThreadStartApiEvent());
- }
- else
- {
- pHandler = new EventHandlerWorkerStats(id);
- pManager->Attach(pHandler);
- pHandler->Handle(ThreadStartWorkerEvent());
- }
-
- pHandler->MarkHeader();
-
- return pManager;
- }
-
- SWR_INVALID("Failed to register thread.");
- return nullptr;
- }
-
- void DestroyThreadContext(HANDLE hThreadContext)
- {
- EventManager* pManager = FromHandle(hThreadContext);
- SWR_ASSERT(pManager != nullptr);
-
- delete pManager;
- }
-
- // Dispatch event for this thread.
- void Dispatch(HANDLE hThreadContext, const Event& event)
- {
- if (event.IsEnabled())
- {
- EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext);
- SWR_ASSERT(pManager != nullptr);
- pManager->Dispatch(event);
- }
- }
-
- // Flush for this thread.
- void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
- {
- EventManager* pManager = FromHandle(hThreadContext);
- SWR_ASSERT(pManager != nullptr);
-
- pManager->FlushDraw(drawId);
- }
-} // namespace ArchRast
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
deleted file mode 100644
index a247443f54b..00000000000
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file archrast.h
- *
- * @brief Definitions for archrast.
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/os.h"
-#include "gen_ar_event.hpp"
-#include "eventmanager.h"
-
-namespace ArchRast
-{
- enum class AR_THREAD
- {
- API = 0,
- WORKER = 1
- };
-
- HANDLE CreateThreadContext(AR_THREAD type);
- void DestroyThreadContext(HANDLE hThreadContext);
-
- // Dispatch event for this thread.
- void Dispatch(HANDLE hThreadContext, const Event& event);
-
- void FlushDraw(HANDLE hThreadContext, uint32_t drawId);
-}; // namespace ArchRast
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h b/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
deleted file mode 100644
index 118a100e850..00000000000
--- a/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file archrast.h
- *
- * @brief Definitions for the event manager.
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/os.h"
-
-#include "gen_ar_event.hpp"
-#include "gen_ar_eventhandler.hpp"
-
-#include <vector>
-
-namespace ArchRast
-{
- //////////////////////////////////////////////////////////////////////////
- /// EventManager - interface to dispatch events to handlers.
- /// Event handling occurs only on a single thread.
- //////////////////////////////////////////////////////////////////////////
- class EventManager
- {
- public:
- EventManager() {}
-
- ~EventManager()
- {
- // Event manager owns destroying handler objects once attached.
- ///@note See comment for Detach.
- for (auto pHandler : mHandlers)
- {
- delete pHandler;
- }
- }
-
- void Attach(EventHandler* pHandler)
- {
- SWR_ASSERT(pHandler != nullptr);
- mHandlers.push_back(pHandler);
- }
-
- void Dispatch(const Event& event)
- {
- ///@todo Add event filter check here.
-
- for (auto pHandler : mHandlers)
- {
- event.Accept(pHandler);
- }
- }
-
- void FlushDraw(uint32_t drawId)
- {
- for (auto pHandler : mHandlers)
- {
- pHandler->FlushDraw(drawId);
- }
- }
-
- private:
- // Handlers stay registered for life
- void Detach(EventHandler* pHandler) { SWR_INVALID("Should not be called"); }
-
- std::vector<EventHandler*> mHandlers;
- };
-}; // namespace ArchRast
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
deleted file mode 100644
index 24739293a30..00000000000
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ /dev/null
@@ -1,427 +0,0 @@
-# Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-#
-# Provides definitions for events.
-
-enum AR_DRAW_TYPE
-{
- Instanced = 0,
- IndexedInstanced = 1,
- InstancedSplit = 2,
- IndexedInstancedSplit = 3
-};
-
-event Framework::ThreadStartApiEvent
-{
-};
-
-event Framework::ThreadStartWorkerEvent
-{
-};
-
-///@brief Used as a helper event to indicate end of frame. Does not guarantee to capture end of frame on all APIs
-event ApiSwr::FrameEndEvent
-{
- uint32_t frameId; // current frame id
- uint32_t nextDrawId; // next draw id (always incremental - does not reset)
-};
-
-///@brief Synchronization event.
-event ApiSwr::SwrSyncEvent
-{
- uint32_t drawId;
-};
-
-///@brief Invalidate hot tiles (i.e. tile cache)
-event ApiSwr::SwrInvalidateTilesEvent
-{
- uint32_t drawId;
-};
-
-///@brief Invalidate and discard hot tiles within pixel region
-event ApiSwr::SwrDiscardRectEvent
-{
- uint32_t drawId;
-};
-
-///@brief Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache)
-event ApiSwr::SwrStoreTilesEvent
-{
- uint32_t drawId;
-};
-
-event PipelineStats::DrawInfoEvent
-{
- uint32_t drawId;
- AR_DRAW_TYPE type; // type of draw (indexed, instanced, etc)
- uint32_t topology; // topology of draw
- uint32_t numVertices; // number of vertices for draw
- uint32_t numIndices; // number of indices for draw
- int32_t indexOffset; // offset into index buffer
- int32_t baseVertex; // which vertex to start with
- uint32_t numInstances; // number of instances to draw
- uint32_t startInstance; // which instance to start fetching
- uint32_t tsEnable; // tesselation enabled
- uint32_t gsEnable; // geometry shader enabled
- uint32_t soEnable; // stream-out enabled
- uint32_t soTopology; // topology of stream-out
- uint32_t splitId; // split draw count or id
-};
-
-event PipelineStats::DispatchEvent
-{
- uint32_t drawId;
- uint32_t threadGroupCountX; // num thread groups in X dimension
- uint32_t threadGroupCountY; // num thread groups in Y dimension
- uint32_t threadGroupCountZ; // num thread groups in Z dimension
-};
-
-event PipelineStats::FrontendStatsEvent
-{
- uint32_t drawId;
- uint64_t IaVertices;
- uint64_t IaPrimitives;
- uint64_t VsInvocations;
- uint64_t HsInvocations;
- uint64_t DsInvocations;
- uint64_t GsInvocations;
- uint64_t GsPrimitives;
- uint64_t CInvocations;
- uint64_t CPrimitives;
- uint64_t SoPrimStorageNeeded0;
- uint64_t SoPrimStorageNeeded1;
- uint64_t SoPrimStorageNeeded2;
- uint64_t SoPrimStorageNeeded3;
- uint64_t SoNumPrimsWritten0;
- uint64_t SoNumPrimsWritten1;
- uint64_t SoNumPrimsWritten2;
- uint64_t SoNumPrimsWritten3;
-};
-
-event PipelineStats::BackendStatsEvent
-{
- uint32_t drawId;
- uint64_t DepthPassCount;
- uint64_t PsInvocations;
- uint64_t CsInvocations;
-
-};
-
-event PipelineStats::EarlyZSingleSample
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateZSingleSample
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyStencilSingleSample
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateStencilSingleSample
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyZSampleRate
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateZSampleRate
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyStencilSampleRate
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateStencilSampleRate
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-// Total Early-Z counts, SingleSample and SampleRate
-event PipelineStats::EarlyZ
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-// Total LateZ counts, SingleSample and SampleRate
-event PipelineStats::LateZ
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-// Total EarlyStencil counts, SingleSample and SampleRate
-event PipelineStats::EarlyStencil
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-// Total LateStencil counts, SingleSample and SampleRate
-event PipelineStats::LateStencil
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyZNullPS
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyStencilNullPS
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyZPixelRate
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateZPixelRate
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-
-event PipelineStats::EarlyOmZ
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::EarlyOmStencil
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateOmZ
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::LateOmStencil
-{
- uint32_t drawId;
- uint64_t passCount;
- uint64_t failCount;
-};
-
-event PipelineStats::GSInputPrims
-{
- uint32_t drawId;
- uint64_t inputPrimCount;
-};
-
-event PipelineStats::GSPrimsGen
-{
- uint32_t drawId;
- uint64_t primGeneratedCount;
-};
-
-event PipelineStats::GSVertsInput
-{
- uint32_t drawId;
- uint64_t vertsInput;
-};
-
-event PipelineStats::TessPrims
-{
- uint32_t drawId;
- uint64_t primCount;
-};
-
-event PipelineStats::RasterTiles
-{
- uint32_t drawId;
- uint32_t rastTileCount;
-};
-
-event PipelineStats::ClipperEvent
-{
- uint32_t drawId;
- uint32_t trivialRejectCount;
- uint32_t trivialAcceptCount;
- uint32_t mustClipCount;
-};
-
-event PipelineStats::CullEvent
-{
- uint32_t drawId;
- uint64_t backfacePrimCount;
- uint64_t degeneratePrimCount;
-};
-
-event PipelineStats::AlphaEvent
-{
- uint32_t drawId;
- uint32_t alphaTestCount;
- uint32_t alphaBlendCount;
-};
-
-event ShaderStats::VSInfo
-{
- uint32_t drawId;
- uint32_t numInstExecuted;
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-};
-
-event ShaderStats::HSInfo
-{
- uint32_t drawId;
- uint32_t numInstExecuted;
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-};
-
-event ShaderStats::DSInfo
-{
- uint32_t drawId;
- uint32_t numInstExecuted;
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-};
-
-event ShaderStats::GSInfo
-{
- uint32_t drawId;
- uint32_t numInstExecuted;
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-
-};
-
-event ShaderStats::PSInfo
-{
- uint32_t drawId;
- uint32_t numInstExecuted;
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-};
-
-event ShaderStats::CSInfo
-{
- uint32_t drawId;
- uint32_t numInstExecuted;
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-};
-
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
deleted file mode 100644
index b57d5c4284f..00000000000
--- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ /dev/null
@@ -1,212 +0,0 @@
-# Copyright (C) 2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-#
-# Provides definitions for private internal events that are only used internally
-# to rasty for communicating information between Rasty and Archrast. One goal for
-# ArchRast is to not pollute the Rasty code with lots of calculations, etc. that
-# are needed to compute per draw statistics, etc.
-
-event PipelineStats::EarlyDepthStencilInfoSingleSample
-{
- uint64_t depthPassMask;
- uint64_t stencilPassMask;
- uint64_t coverageMask;
-};
-
-event PipelineStats::EarlyDepthStencilInfoSampleRate
-{
- uint64_t depthPassMask;
- uint64_t stencilPassMask;
- uint64_t coverageMask;
-};
-
-event PipelineStats::EarlyDepthStencilInfoNullPS
-{
- uint64_t depthPassMask;
- uint64_t stencilPassMask;
- uint64_t coverageMask;
-};
-
-event PipelineStats::LateDepthStencilInfoSingleSample
-{
- uint64_t depthPassMask;
- uint64_t stencilPassMask;
- uint64_t coverageMask;
-};
-
-event PipelineStats::LateDepthStencilInfoSampleRate
-{
- uint64_t depthPassMask;
- uint64_t stencilPassMask;
- uint64_t coverageMask;
-};
-
-event PipelineStats::LateDepthStencilInfoNullPS
-{
- uint64_t depthPassMask;
- uint64_t stencilPassMask;
- uint64_t coverageMask;
-};
-
-event PipelineStats::EarlyDepthInfoPixelRate
-{
- uint64_t depthPassCount;
- uint64_t activeLanes;
-};
-
-
-event PipelineStats::LateDepthInfoPixelRate
-{
- uint64_t depthPassCount;
- uint64_t activeLanes;
-};
-
-
-event PipelineStats::BackendDrawEndEvent
-{
- uint32_t drawId;
-};
-
-event PipelineStats::FrontendDrawEndEvent
-{
- uint32_t drawId;
-};
-
-event Memory::MemoryAccessEvent
-{
- uint32_t drawId;
- uint64_t tsc;
- uint64_t ptr;
- uint32_t size;
- uint8_t isRead;
- uint8_t client;
-};
-
-event Memory::MemoryStatsEndEvent
-{
- uint32_t drawId;
-};
-
-event PipelineStats::TessPrimCount
-{
- uint64_t primCount;
-};
-
-event PipelineStats::RasterTileCount
-{
- uint32_t drawId;
- uint64_t rasterTiles;
-};
-
-event PipelineStats::GSPrimInfo
-{
- uint64_t inputPrimCount;
- uint64_t primGeneratedCount;
- uint64_t vertsInput;
-};
-
-// validMask is primitives that still need to be clipped. They weren't rejected due to trivial reject or nan.
-// clipMask is primitives that need to be clipped. So trivial accepts will be 0 while validMask for that is 1.
-// Trivial reject is numInvocations - pop_cnt32(validMask)
-// Trivial accept is validMask & ~clipMask
-// Must clip count is pop_cnt32(clipMask)
-event PipelineStats::ClipInfoEvent
-{
- uint32_t numInvocations;
- uint32_t validMask;
- uint32_t clipMask;
-};
-
-event PipelineStats::CullInfoEvent
-{
- uint32_t drawId;
- uint64_t degeneratePrimMask;
- uint64_t backfacePrimMask;
- uint32_t validMask;
-};
-
-event PipelineStats::AlphaInfoEvent
-{
- uint32_t drawId;
- uint32_t alphaTestEnable;
- uint32_t alphaBlendEnable;
-};
-
-event PipelineStats::DrawInstancedEvent
-{
- uint32_t drawId;
- uint32_t topology;
- uint32_t numVertices;
- int32_t startVertex;
- uint32_t numInstances;
- uint32_t startInstance;
- uint32_t tsEnable;
- uint32_t gsEnable;
- uint32_t soEnable;
- uint32_t soTopology;
- uint32_t splitId; // Split draw count or id.
-};
-
-event PipelineStats::DrawIndexedInstancedEvent
-{
- uint32_t drawId;
- uint32_t topology;
- uint32_t numIndices;
- int32_t indexOffset;
- int32_t baseVertex;
- uint32_t numInstances;
- uint32_t startInstance;
- uint32_t tsEnable;
- uint32_t gsEnable;
- uint32_t soEnable;
- uint32_t soTopology;
- uint32_t splitId; // Split draw count or id.
-};
-
-event ShaderStats::VSStats
-{
- HANDLE hStats; // SWR_SHADER_STATS
-};
-
-event ShaderStats::HSStats
-{
- HANDLE hStats; // SWR_SHADER_STATS
-};
-
-event ShaderStats::DSStats
-{
- HANDLE hStats; // SWR_SHADER_STATS
-};
-
-event ShaderStats::GSStats
-{
- HANDLE hStats; // SWR_SHADER_STATS
-};
-
-event ShaderStats::PSStats
-{
- HANDLE hStats; // SWR_SHADER_STATS
-};
-
-event ShaderStats::CSStats
-{
- HANDLE hStats; // SWR_SHADER_STATS
-}; \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
deleted file mode 100644
index a4be675a34c..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
+++ /dev/null
@@ -1,327 +0,0 @@
-# Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-# Python source
-import os
-import sys
-import re
-from gen_common import *
-
-def parse_event_fields(lines, idx, event_dict):
- """
- Parses lines from a proto file that contain an event definition and stores it in event_dict
- """
- fields = []
- end_of_event = False
-
- # record all fields in event definition.
- # note: we don't check if there's a leading brace.
- while not end_of_event and idx < len(lines):
- line = lines[idx].rstrip()
- idx += 1
-
- # ex 1: uint32_t numSampleCLZExecuted; // number of sample_cl_z instructions executed
- # ex 2: char reason[256]; // size of reason
- match = re.match(r'^(\s*)([\w\*]+)(\s+)([\w]+)(\[\d+\])*;\s*(\/\/.*)*$', line)
- # group 1 -
- # group 2 type
- # group 3 -
- # group 4 name
- # group 5 [array size]
- # group 6 //comment
-
- if match:
- field = {
- "type": match.group(2),
- "name": match.group(4),
- "size": int(match.group(5)[1:-1]) if match.group(5) else 1,
- "desc": match.group(6)[2:].strip() if match.group(6) else "",
- }
- fields.append(field)
-
- end_of_event = re.match(r'(\s*)};', line)
-
- event_dict['fields'] = fields
- event_dict['num_fields'] = len(fields)
-
- return idx
-
-def parse_enums(lines, idx, event_dict):
- """
- Parses lines from a proto file that contain an enum definition and stores it in event_dict
- """
- enum_names = []
- end_of_enum = False
-
- # record all enum values in enumeration
- # note: we don't check if there's a leading brace.
- while not end_of_enum and idx < len(lines):
- line = lines[idx].rstrip()
- idx += 1
-
- preprocessor = re.search(r'#if|#endif', line)
-
- if not preprocessor:
- enum = re.match(r'(\s*)(\w+)(\s*)', line)
-
- if enum:
- enum_names.append(line)
-
- end_of_enum = re.match(r'(\s*)};', line)
-
- event_dict['names'] = enum_names
- return idx
-
-def parse_protos(files, verbose=False):
- """
- Parses a proto file and returns a dictionary of event definitions
- """
-
- # Protos structure:
- #
- # {
- # "events": {
- # "defs": { // dict of event definitions where keys are 'group_name::event_name"
- # ...,
- # "ApiStat::DrawInfoEvent": {
- # "id": 3,
- # "group": "ApiStat",
- # "name": "DrawInfoEvent", // name of event without 'group_name::' prefix
- # "desc": "",
- # "fields": [
- # {
- # "type": "uint32_t",
- # "name": "drawId",
- # "size": 1,
- # "desc": "",
- # },
- # ...
- # ]
- # },
- # ...
- # },
- # "groups": { // dict of groups with lists of event keys
- # "ApiStat": [
- # "ApiStat::DispatchEvent",
- # "ApiStat::DrawInfoEvent",
- # ...
- # ],
- # "Framework": [
- # "Framework::ThreadStartApiEvent",
- # "Framework::ThreadStartWorkerEvent",
- # ...
- # ],
- # ...
- # },
- # "map": { // map of event ids to match archrast output to event key
- # "1": "Framework::ThreadStartApiEvent",
- # "2": "Framework::ThreadStartWorkerEvent",
- # "3": "ApiStat::DrawInfoEvent",
- # ...
- # }
- # },
- # "enums": { ... } // enums follow similar defs, map (groups?) structure
- # }
-
- protos = {
- 'events': {
- 'defs': {}, # event dictionary containing events with their fields
- 'map': {}, # dictionary to map event ids to event names
- 'groups': {} # event keys stored by groups
- },
- 'enums': {
- 'defs': {},
- 'map': {}
- }
- }
-
- event_id = 0
- enum_id = 0
-
- if type(files) is not list:
- files = [files]
-
- for filename in files:
- if verbose:
- print("Parsing proto file: %s" % os.path.normpath(filename))
-
- with open(filename, 'r') as f:
- lines = f.readlines()
- in_brief = False
- brief = []
- idx = 0
- while idx < len(lines):
- line = lines[idx].strip()
- idx += 1
-
- # If currently processing a brief, keep processing or change state
- if in_brief:
- match = re.match(r'^\s*\/\/\/\s*(.*)$', line) # i.e. "/// more event desc..."
- if match:
- brief.append(match.group(1).strip())
- continue
- else:
- in_brief = False
-
- # Match event/enum brief
- match = re.match(r'^\s*\/\/\/\s*@(brief|breif)\s*(.*)$', line) # i.e. "///@brief My event desc..."
- if match:
- in_brief = True
- brief.append(match.group(2).strip())
- continue
-
- # Match event definition
- match = re.match(r'event(\s*)(((\w*)::){0,1}(\w+))', line) # i.e. "event SWTag::CounterEvent"
- if match:
- event_id += 1
-
- # Parse event attributes
- event_key = match.group(2) # i.e. SWTag::CounterEvent
- event_group = match.group(4) if match.group(4) else "" # i.e. SWTag
- event_name = match.group(5) # i.e. CounterEvent
-
- # Define event attributes
- event = {
- 'id': event_id,
- 'group': event_group,
- 'name': event_name,
- 'desc': ' '.join(brief)
- }
- # Add period at end of event desc if necessary
- if event["desc"] and event["desc"][-1] != '.':
- event["desc"] += '.'
-
- # Reset brief
- brief = []
-
- # Now add event fields
- idx = parse_event_fields(lines, idx, event)
-
- # Register event and mapping
- protos['events']['defs'][event_key] = event
- protos['events']['map'][event_id] = event_key
-
- continue
-
- # Match enum definition
- match = re.match(r'enum(\s*)(\w+)', line)
- if match:
- enum_id += 1
-
- # Parse enum attributes
- enum_name = match.group(2)
-
- # Define enum attr
- enum = {
- 'name': enum_name,
- 'desc': ' '.join(brief)
- }
- # Add period at end of event desc if necessary
- if enum["desc"] and enum["desc"][-1] != '.':
- enum["desc"] += '.'
-
- # Reset brief
- brief = []
-
- # Now add enum fields
- idx = parse_enums(lines, idx, enum)
-
- # Register enum and mapping
- protos['enums']['defs'][enum_name] = enum
- protos['enums']['map'][enum_id] = enum_name
-
- continue
-
- # Sort and group events
- event_groups = protos['events']['groups']
- for key in sorted(protos['events']['defs']):
- group = protos['events']['defs'][key]['group']
- if group not in event_groups:
- event_groups[group] = []
- event_groups[group].append(key)
-
- return protos
-
-
-def main():
-
- # Parse args...
- parser = ArgumentParser()
- parser.add_argument("--proto", "-p", dest="protos", nargs='+', help="Path to all proto file(s) to process. Accepts one or more paths (i.e. events.proto and events_private.proto)", required=True)
- parser.add_argument("--output-dir", help="Output dir (defaults to ./codegen). Will create folder if it does not exist.", required=False, default="codegen")
- parser.add_argument("--verbose", "-v", help="Verbose", action="store_true")
- args = parser.parse_args()
-
- if not os.path.exists(args.output_dir):
- MakeDir(args.output_dir)
-
- for f in args.protos:
- if not os.path.exists(f):
- print('Error: Could not find proto file %s' % f, file=sys.stderr)
- return 1
-
- # Parse each proto file and add to protos container
- protos = parse_protos(args.protos, args.verbose)
-
- files = [
- ["gen_ar_event.hpp", ""],
- ["gen_ar_event.cpp", ""],
- ["gen_ar_eventhandler.hpp", "gen_ar_event.hpp"],
- ["gen_ar_eventhandlerfile.hpp", "gen_ar_eventhandler.hpp"]
- ]
-
- rval = 0
-
- try:
- # Delete existing files
- for f in files:
- filename = f[0]
- output_fullpath = os.path.join(args.output_dir, filename)
- if os.path.exists(output_fullpath):
- if args.verbose:
- print("Deleting existing file: %s" % output_fullpath)
- os.remove(output_fullpath)
-
- # Generate files from templates
- print("Generating c++ from proto files...")
- for f in files:
- filename = f[0]
- event_header = f[1]
- curdir = os.path.dirname(os.path.abspath(__file__))
- template_file = os.path.join(curdir, 'templates', filename)
- output_fullpath = os.path.join(args.output_dir, filename)
-
- if args.verbose:
- print("Generating: %s" % output_fullpath)
- MakoTemplateWriter.to_file(template_file, output_fullpath,
- cmdline=sys.argv,
- filename=filename,
- protos=protos,
- event_header=event_header)
-
- except Exception as e:
- print(e)
- rval = 1
-
- return rval
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_backends.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_backends.py
deleted file mode 100644
index eb51a3a8a13..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_backends.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (C) 2017-2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the 'Software'),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-# Python source
-
-import itertools
-import os
-import sys
-from gen_common import *
-
-
-def main(args=sys.argv[1:]):
- thisDir = os.path.dirname(os.path.realpath(__file__))
- parser = ArgumentParser('Generate files and initialization functions for all permutations of BackendPixelRate.')
- parser.add_argument('--dim', help='gBackendPixelRateTable array dimensions', nargs='+', type=int, required=True)
- parser.add_argument('--outdir', help='output directory', nargs='?', type=str, default=thisDir)
- parser.add_argument('--split', help='how many lines of initialization per file [0=no split]', nargs='?', type=int, default='512')
- parser.add_argument('--numfiles', help='how many output files to generate', nargs='?', type=int, default='0')
- parser.add_argument('--cpp', help='Generate cpp file(s)', action='store_true', default=False)
- parser.add_argument('--hpp', help='Generate hpp file', action='store_true', default=False)
- parser.add_argument('--cmake', help='Generate cmake file', action='store_true', default=False)
- parser.add_argument('--rast', help='Generate rasterizer functions instead of normal backend', action='store_true', default=False)
-
- args = parser.parse_args(args)
-
-
- class backendStrs :
- def __init__(self) :
- self.outFileName = 'gen_BackendPixelRate%s.cpp'
- self.outHeaderName = 'gen_BackendPixelRate.hpp'
- self.functionTableName = 'gBackendPixelRateTable'
- self.funcInstanceHeader = ' = BackendPixelRate<SwrBackendTraits<'
- self.template = 'gen_backend.cpp'
- self.hpp_template = 'gen_header_init.hpp'
- self.cmakeFileName = 'gen_backends.cmake'
- self.cmakeSrcVar = 'GEN_BACKEND_SOURCES'
- self.tableName = 'BackendPixelRate'
-
- if args.rast:
- self.outFileName = 'gen_rasterizer%s.cpp'
- self.outHeaderName = 'gen_rasterizer.hpp'
- self.functionTableName = 'gRasterizerFuncs'
- self.funcInstanceHeader = ' = RasterizeTriangle<RasterizerTraits<'
- self.template = 'gen_rasterizer.cpp'
- self.cmakeFileName = 'gen_rasterizer.cmake'
- self.cmakeSrcVar = 'GEN_RASTERIZER_SOURCES'
- self.tableName = 'RasterizerFuncs'
-
-
- backend = backendStrs()
-
- output_list = []
- for x in args.dim:
- output_list.append(list(range(x)))
-
- # generate all permutations possible for template parameter inputs
- output_combinations = list(itertools.product(*output_list))
- output_list = []
-
- # for each permutation
- for x in range(len(output_combinations)):
- # separate each template peram into its own list member
- new_list = [output_combinations[x][i] for i in range(len(output_combinations[x]))]
- tempStr = backend.functionTableName
- #print each list member as an index in the multidimensional array
- for i in new_list:
- tempStr += '[' + str(i) + ']'
- #map each entry in the permutation as its own string member, store as the template instantiation string
- tempStr += backend.funcInstanceHeader + ','.join(map(str, output_combinations[x])) + '>>;'
- #append the line of c++ code in the list of output lines
- output_list.append(tempStr)
-
- # how many files should we split the global template initialization into?
- if (args.split == 0):
- numFiles = 1
- else:
- numFiles = (len(output_list) + args.split - 1) // args.split
- if (args.numfiles != 0):
- numFiles = args.numfiles
- linesPerFile = (len(output_list) + numFiles - 1) // numFiles
- chunkedList = [output_list[x:x+linesPerFile] for x in range(0, len(output_list), linesPerFile)]
-
- tmp_output_dir = MakeTmpDir('_codegen')
-
- if not os.path.exists(args.outdir):
- try:
- os.makedirs(args.outdir)
- except OSError as err:
- if err.errno != errno.EEXIST:
- print('ERROR: Could not create directory:', args.outdir, file=sys.stderr)
- return 1
-
- rval = 0
-
- # generate .cpp files
- try:
- if args.cpp:
- baseCppName = os.path.join(tmp_output_dir, backend.outFileName)
- templateCpp = os.path.join(thisDir, 'templates', backend.template)
-
- for fileNum in range(numFiles):
- filename = baseCppName % str(fileNum)
- MakoTemplateWriter.to_file(
- templateCpp,
- baseCppName % str(fileNum),
- cmdline=sys.argv,
- fileNum=fileNum,
- funcList=chunkedList[fileNum])
-
- if args.hpp:
- baseHppName = os.path.join(tmp_output_dir, backend.outHeaderName)
- templateHpp = os.path.join(thisDir, 'templates', backend.hpp_template)
-
- MakoTemplateWriter.to_file(
- templateHpp,
- baseHppName,
- cmdline=sys.argv,
- numFiles=numFiles,
- filename=backend.outHeaderName,
- tableName=backend.tableName)
-
- # generate gen_backend.cmake file
- if args.cmake:
- templateCmake = os.path.join(thisDir, 'templates', 'gen_backend.cmake')
- cmakeFile = os.path.join(tmp_output_dir, backend.cmakeFileName)
-
- MakoTemplateWriter.to_file(
- templateCmake,
- cmakeFile,
- cmdline=sys.argv,
- srcVar=backend.cmakeSrcVar,
- numFiles=numFiles,
- baseCppName='${RASTY_GEN_SRC_DIR}/backends/' + os.path.basename(baseCppName))
-
- rval = CopyDirFilesIfDifferent(tmp_output_dir, args.outdir)
-
- except:
- rval = 1
-
- finally:
- DeleteDirTree(tmp_output_dir)
-
- return rval
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_common.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_common.py
deleted file mode 100644
index c1d08fb83bc..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_common.py
+++ /dev/null
@@ -1,291 +0,0 @@
-# Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-# Python source
-import os
-import errno
-import sys
-import argparse
-import tempfile
-import filecmp
-import shutil
-from mako.template import Template
-from mako.exceptions import RichTraceback
-
-#==============================================================================
-def ConcatLists(list_of_lists):
- output = []
- for l in list_of_lists: output += l
- return output
-
-#==============================================================================
-def MakeTmpDir(suffix=''):
- '''
- Create temporary directory for use in codegen scripts.
- '''
- return tempfile.mkdtemp(suffix)
-
-#==============================================================================
-def MakeDir(dir_path):
- '''
- Create a directory if it doesn't exist
-
- returns 0 on success, non-zero on failure
- '''
- dir_path = os.path.abspath(dir_path)
-
- if not os.path.exists(dir_path):
- try:
- os.makedirs(dir_path)
- except OSError as err:
- if err.errno != errno.EEXIST:
- return 1
- else:
- if not os.path.isdir(dir_path):
- return 1
-
- return 0
-
-#==============================================================================
-def DeleteDirTree(dir_path):
- '''
- Delete directory tree.
-
- returns 0 on success, non-zero on failure
- '''
- rval = 0
- try:
- shutil.rmtree(dir_path, False)
- except:
- rval = 1
- return rval
-
-#==============================================================================
-def CopyFileIfDifferent(src, dst, verbose = False):
- '''
- Copy <src> file to <dst> file if the <dst>
- file either doesn't contain the file or the file
- contents are different.
-
- returns 0 on success, non-zero on failure
- '''
-
- assert os.path.isfile(src)
- assert (False == os.path.exists(dst) or os.path.isfile(dst))
-
- need_copy = not os.path.exists(dst)
- if not need_copy:
- need_copy = not filecmp.cmp(src, dst)
-
- if need_copy:
- try:
- shutil.copy2(src, dst)
- except:
- print('ERROR: Could not copy %s to %s' % (src, dst), file=sys.stderr)
- return 1
-
- if verbose:
- print(src, '-->', dst)
-
- return 0
-
-#==============================================================================
-def CopyDirFilesIfDifferent(src, dst, recurse = True, verbose = False, orig_dst = None):
- '''
- Copy files <src> directory to <dst> directory if the <dst>
- directory either doesn't contain the file or the file
- contents are different.
-
- Optionally recurses into subdirectories
-
- returns 0 on success, non-zero on failure
- '''
-
- assert os.path.isdir(src)
- assert os.path.isdir(dst)
-
- src = os.path.abspath(src)
- dst = os.path.abspath(dst)
-
- if not orig_dst:
- orig_dst = dst
-
- for f in os.listdir(src):
- src_path = os.path.join(src, f)
- dst_path = os.path.join(dst, f)
-
- # prevent recursion
- if src_path == orig_dst:
- continue
-
- if os.path.isdir(src_path):
- if recurse:
- if MakeDir(dst_path):
- print('ERROR: Could not create directory:', dst_path, file=sys.stderr)
- return 1
-
- if verbose:
- print('mkdir', dst_path)
- rval = CopyDirFilesIfDifferent(src_path, dst_path, recurse, verbose, orig_dst)
- else:
- rval = CopyFileIfDifferent(src_path, dst_path, verbose)
-
- if rval:
- return rval
-
- return 0
-
-#==============================================================================
-class MakoTemplateWriter:
- '''
- MakoTemplateWriter - Class (namespace) for functions to generate strings
- or files using the Mako template module.
-
- See http://docs.makotemplates.org/en/latest/ for
- mako documentation.
- '''
-
- @staticmethod
- def to_string(template_filename, **kwargs):
- '''
- Write template data to a string object and return the string
- '''
- from mako.template import Template
- from mako.exceptions import RichTraceback
-
- try:
- template = Template(filename=template_filename)
- # Split + Join fixes line-endings for whatever platform you are using
- return '\n'.join(template.render(**kwargs).splitlines())
- except:
- traceback = RichTraceback()
- for (filename, lineno, function, line) in traceback.traceback:
- print('File %s, line %s, in %s' % (filename, lineno, function))
- print(line, '\n')
- print('%s: %s' % (str(traceback.error.__class__.__name__), traceback.error))
- raise
-
- @staticmethod
- def to_file(template_filename, output_filename, **kwargs):
- '''
- Write template data to a file
- '''
- if MakeDir(os.path.dirname(output_filename)):
- return 1
- with open(output_filename, 'w') as outfile:
- print(MakoTemplateWriter.to_string(template_filename, **kwargs), file=outfile)
- return 0
-
-
-#==============================================================================
-class ArgumentParser(argparse.ArgumentParser):
- '''
- Subclass of argparse.ArgumentParser
-
- Allow parsing from command files that start with @
- Example:
- >bt run @myargs.txt
-
- Contents of myargs.txt:
- -m <machine>
- --target cdv_win7
-
- The below function allows multiple args to be placed on the same text-file line.
- The default is one token per line, which is a little cumbersome.
-
- Also allow all characters after a '#' character to be ignored.
- '''
-
- #==============================================================================
- class _HelpFormatter(argparse.RawTextHelpFormatter):
- ''' Better help formatter for argument parser '''
-
- def _split_lines(self, text, width):
- ''' optimized split lines algorithm, indents split lines '''
- lines = text.splitlines()
- out_lines = []
- if len(lines):
- out_lines.append(lines[0])
- for line in lines[1:]:
- out_lines.append(' ' + line)
- return out_lines
-
- #==============================================================================
- def __init__(self, *args, **kwargs):
- ''' Constructor. Compatible with argparse.ArgumentParser(),
- but with some modifications for better usage and help display.
- '''
- super(ArgumentParser, self).__init__(
- *args,
- fromfile_prefix_chars='@',
- formatter_class=ArgumentParser._HelpFormatter,
- **kwargs)
-
- #==========================================================================
- def convert_arg_line_to_args(self, arg_line):
- ''' convert one line of parsed file to arguments '''
- arg_line = arg_line.split('#', 1)[0]
- if sys.platform == 'win32':
- arg_line = arg_line.replace('\\', '\\\\')
- for arg in shlex.split(arg_line):
- if not arg.strip():
- continue
- yield arg
-
- #==========================================================================
- def _read_args_from_files(self, arg_strings):
- ''' read arguments from files '''
- # expand arguments referencing files
- new_arg_strings = []
- for arg_string in arg_strings:
-
- # for regular arguments, just add them back into the list
- if arg_string[0] not in self.fromfile_prefix_chars:
- new_arg_strings.append(arg_string)
-
- # replace arguments referencing files with the file content
- else:
- filename = arg_string[1:]
-
- # Search in sys.path
- if not os.path.exists(filename):
- for path in sys.path:
- filename = os.path.join(path, arg_string[1:])
- if os.path.exists(filename):
- break
-
- try:
- args_file = open(filename)
- try:
- arg_strings = []
- for arg_line in args_file.read().splitlines():
- for arg in self.convert_arg_line_to_args(arg_line):
- arg_strings.append(arg)
- arg_strings = self._read_args_from_files(arg_strings)
- new_arg_strings.extend(arg_strings)
- finally:
- args_file.close()
- except IOError:
- err = sys.exc_info()[1]
- self.error(str(err))
-
- # return the modified argument list
- return new_arg_strings
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py
deleted file mode 100644
index bd39ef645f7..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_knobs.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-# Python source
-import os
-import sys
-import knob_defs
-from gen_common import *
-
-def main(args=sys.argv[1:]):
-
- # parse args
- parser = ArgumentParser()
- parser.add_argument("--output", "-o", help="Path to output file", required=True)
- parser.add_argument("--gen_h", "-gen_h", help="Generate gen_knobs.h", action="store_true", default=False)
- parser.add_argument("--gen_cpp", "-gen_cpp", help="Generate gen_knobs.cpp", action="store_true", required=False)
-
- args = parser.parse_args()
-
- cur_dir = os.path.dirname(os.path.abspath(__file__))
- template_cpp = os.path.join(cur_dir, 'templates', 'gen_knobs.cpp')
- template_h = os.path.join(cur_dir, 'templates', 'gen_knobs.h')
-
- output_filename = os.path.basename(args.output)
- output_dir = MakeTmpDir('_codegen')
-
- output_file = os.path.join(output_dir, output_filename)
-
- rval = 0
-
- try:
- if args.gen_h:
- MakoTemplateWriter.to_file(
- template_h,
- output_file,
- cmdline=sys.argv,
- filename='gen_knobs',
- knobs=knob_defs.KNOBS)
-
- if args.gen_cpp:
- MakoTemplateWriter.to_file(
- template_cpp,
- output_file,
- cmdline=sys.argv,
- filename='gen_knobs',
- knobs=knob_defs.KNOBS,
- includes=['core/knobs_init.h', 'common/os.h', 'sstream', 'iomanip'])
-
- rval = CopyFileIfDifferent(output_file, args.output)
-
- except:
- rval = 1
-
- finally:
- # ignore errors from delete of tmp directory
- DeleteDirTree(output_dir)
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
-
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
deleted file mode 100644
index f3ab7120a43..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import os, sys, re
-from gen_common import *
-from argparse import FileType
-
-inst_aliases = {
- 'SHUFFLE_VECTOR': 'VSHUFFLE',
- 'INSERT_ELEMENT': 'VINSERT',
- 'EXTRACT_ELEMENT': 'VEXTRACT',
- 'MEM_SET': 'MEMSET',
- 'MEM_CPY': 'MEMCOPY',
- 'MEM_MOVE': 'MEMMOVE',
- 'L_SHR': 'LSHR',
- 'A_SHR': 'ASHR',
- 'BIT_CAST': 'BITCAST',
- 'U_DIV': 'UDIV',
- 'S_DIV': 'SDIV',
- 'U_REM': 'UREM',
- 'S_REM': 'SREM',
- 'BIN_OP': 'BINOP',
-}
-
-intrinsics = [
- ['VGATHERPD', ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
- ['VGATHERPS', ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
- ['VGATHERDD', ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
- ['VSCATTERPS', ['pBase', 'mask', 'indices', 'src', 'scale'], 'src'],
- ['VRCPPS', ['a'], 'a'],
- ['VROUND', ['a', 'rounding'], 'a'],
- ['BEXTR_32', ['src', 'control'], 'src'],
- ['VPSHUFB', ['a', 'b'], 'a'],
- ['VPERMD', ['a', 'idx'], 'a'],
- ['VPERMPS', ['idx', 'a'], 'a'],
- ['VCVTPD2PS', ['a'], 'getVectorType(mFP32Ty, VEC_GET_NUM_ELEMS)'],
- ['VCVTPS2PH', ['a', 'round'], 'mSimdInt16Ty'],
- ['VHSUBPS', ['a', 'b'], 'a'],
- ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
- ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
- ['VPHADDD', ['a', 'b'], 'a'],
- ['PDEP32', ['a', 'b'], 'a'],
- ['RDTSC', [], 'mInt64Ty'],
-]
-
-llvm_intrinsics = [
- ['CTTZ', 'cttz', ['a', 'flag'], ['a']],
- ['CTLZ', 'ctlz', ['a', 'flag'], ['a']],
- ['VSQRTPS', 'sqrt', ['a'], ['a']],
- ['STACKSAVE', 'stacksave', [], []],
- ['STACKRESTORE', 'stackrestore', ['a'], []],
- ['VMINPS', 'minnum', ['a', 'b'], ['a']],
- ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
- ['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']],
- ['DEBUGTRAP', 'debugtrap', [], []],
- ['POPCNT', 'ctpop', ['a'], ['a']],
- ['LOG2', 'log2', ['a'], ['a']],
- ['FABS', 'fabs', ['a'], ['a']],
- ['EXP2', 'exp2', ['a'], ['a']],
- ['COS', 'cos', ['a'], ['a']],
- ['SIN', 'sin', ['a'], ['a']],
- ['FLOOR', 'floor', ['a'], ['a']],
- ['POW', 'pow', ['a', 'b'], ['a']]
-]
-
-this_dir = os.path.dirname(os.path.abspath(__file__))
-template = os.path.join(this_dir, 'templates', 'gen_builder.hpp')
-
-def convert_uppercamel(name):
- s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
- return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).upper()
-
-'''
- Given an input file (e.g. IRBuilder.h) generates function dictionary.
-'''
-def parse_ir_builder(input_file):
-
- functions = []
-
- lines = input_file.readlines()
- deprecated = None
-
- idx = 0
- while idx < len(lines) - 1:
- line = lines[idx].rstrip()
- idx += 1
-
- if deprecated is None:
- deprecated = re.search(r'LLVM_ATTRIBUTE_DEPRECATED', line)
-
- #match = re.search(r'\*Create', line)
- match = re.search(r'[\*\s]Create(\w*)\(', line)
- if match is not None:
- #print('Line: %s' % match.group(1))
-
- # Skip function if LLVM_ATTRIBUTE_DEPRECATED found before
- if deprecated is not None:
- deprecated = None
- continue
-
- if re.search(r'^\s*Create', line) is not None:
- func_sig = lines[idx-2].rstrip() + line
- else:
- func_sig = line
-
- end_of_args = False
- while not end_of_args:
- end_paren = re.search(r'\)', line)
- if end_paren is not None:
- end_of_args = True
- else:
- line = lines[idx].rstrip()
- func_sig += line
- idx += 1
-
- delfunc = re.search(r'LLVM_DELETED_FUNCTION|= delete;', func_sig)
-
- if not delfunc:
- func = re.search(r'(.*?)\*[\n\s]*(Create\w*)\((.*?)\)', func_sig)
- if func is not None:
-
- return_type = func.group(1).strip() + '*'
- func_name = func.group(2)
- arguments = func.group(3)
-
- func_args = []
- arg_names = []
- args = arguments.split(',')
- for arg in args:
- arg = arg.strip()
- if arg:
- func_args.append(arg)
-
- split_args = arg.split('=')
- arg_name = split_args[0].rsplit(None, 1)[-1]
-
- reg_arg = re.search(r'[\&\*]*(\w*)', arg_name)
- if reg_arg:
- arg_names += [reg_arg.group(1)]
-
- ignore = False
-
- # The following functions need to be ignored in openswr.
- # API change in llvm-5.0 breaks baked autogen files
- if (
- (func_name == 'CreateFence' or
- func_name == 'CreateAtomicCmpXchg' or
- func_name == 'CreateAtomicRMW')):
- ignore = True
-
- # The following functions need to be ignored.
- if (func_name == 'CreateInsertNUWNSWBinOp' or
- func_name == 'CreateMaskedIntrinsic' or
- func_name == 'CreateAlignmentAssumptionHelper' or
- func_name == 'CreateGEP' or
- func_name == 'CreateLoad' or
- func_name == 'CreateMaskedLoad' or
- func_name == 'CreateStore' or
- func_name == 'CreateMaskedStore' or
- func_name == 'CreateFCmpHelper' or
- func_name == 'CreateElementUnorderedAtomicMemCpy'):
- ignore = True
-
- # Convert CamelCase to CAMEL_CASE
- func_mod = re.search(r'Create(\w*)', func_name)
- if func_mod:
- func_mod = func_mod.group(1)
- func_mod = convert_uppercamel(func_mod)
- if func_mod[0:2] == 'F_' or func_mod[0:2] == 'I_':
- func_mod = func_mod[0] + func_mod[2:]
-
- # Substitute alias based on CAMEL_CASE name.
- func_alias = inst_aliases.get(func_mod)
- if not func_alias:
- func_alias = func_mod
-
- if func_name == 'CreateCall' or func_name == 'CreateGEP':
- arglist = re.search(r'ArrayRef', ', '.join(func_args))
- if arglist:
- func_alias = func_alias + 'A'
-
- if not ignore:
- functions.append({
- 'name' : func_name,
- 'alias' : func_alias,
- 'return' : return_type,
- 'args' : ', '.join(func_args),
- 'arg_names' : arg_names,
- })
-
- return functions
-
-'''
- Auto-generates macros for LLVM IR
-'''
-def generate_gen_h(functions, output_dir):
- filename = 'gen_builder.hpp'
- output_filename = os.path.join(output_dir, filename)
-
- templfuncs = []
- for func in functions:
- decl = '%s %s(%s)' % (func['return'], func['alias'], func['args'])
-
- templfuncs.append({
- 'decl' : decl,
- 'intrin' : func['name'],
- 'args' : func['arg_names'],
- })
-
- MakoTemplateWriter.to_file(
- template,
- output_filename,
- cmdline=sys.argv,
- comment='Builder IR Wrappers',
- filename=filename,
- functions=templfuncs,
- isX86=False, isIntrin=False)
-
-'''
- Auto-generates macros for LLVM IR
-'''
-def generate_meta_h(output_dir):
- filename = 'gen_builder_meta.hpp'
- output_filename = os.path.join(output_dir, filename)
-
- functions = []
- for inst in intrinsics:
- name = inst[0]
- args = inst[1]
- ret = inst[2]
-
- #print('Inst: %s, x86: %s numArgs: %d' % (inst[0], inst[1], len(inst[2])))
- if len(args) != 0:
- declargs = 'Value* ' + ', Value* '.join(args)
- decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (name, declargs)
- else:
- decl = 'Value* %s(const llvm::Twine& name = "")' % (name)
-
- # determine the return type of the intrinsic. It can either be:
- # - type of one of the input arguments
- # - snippet of code to set the return type
-
- if ret in args:
- returnTy = ret + '->getType()'
- else:
- returnTy = ret
-
- functions.append({
- 'decl' : decl,
- 'name' : name,
- 'args' : args,
- 'returnType': returnTy
- })
-
- MakoTemplateWriter.to_file(
- template,
- output_filename,
- cmdline=sys.argv,
- comment='meta intrinsics',
- filename=filename,
- functions=functions,
- isX86=True, isIntrin=False)
-
-def generate_intrin_h(output_dir):
- filename = 'gen_builder_intrin.hpp'
- output_filename = os.path.join(output_dir, filename)
-
- functions = []
- for inst in llvm_intrinsics:
- #print('Inst: %s, x86: %s numArgs: %d' % (inst[0], inst[1], len(inst[2])))
- if len(inst[2]) != 0:
- declargs = 'Value* ' + ', Value* '.join(inst[2])
- decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (inst[0], declargs)
- else:
- decl = 'Value* %s(const llvm::Twine& name = "")' % (inst[0])
-
- functions.append({
- 'decl' : decl,
- 'intrin' : inst[1],
- 'args' : inst[2],
- 'types' : inst[3],
- })
-
- MakoTemplateWriter.to_file(
- template,
- output_filename,
- cmdline=sys.argv,
- comment='llvm intrinsics',
- filename=filename,
- functions=functions,
- isX86=False, isIntrin=True)
-'''
- Function which is invoked when this script is started from a command line.
- Will present and consume a set of arguments which will tell this script how
- to behave
-'''
-def main():
-
- # Parse args...
- parser = ArgumentParser()
- parser.add_argument('--input', '-i', type=FileType('r'), help='Path to IRBuilder.h', required=False)
- parser.add_argument('--output-dir', '-o', action='store', dest='output', help='Path to output directory', required=True)
- parser.add_argument('--gen_h', help='Generate builder_gen.h', action='store_true', default=False)
- parser.add_argument('--gen_meta_h', help='Generate meta intrinsics. No input is needed.', action='store_true', default=False)
- parser.add_argument('--gen_intrin_h', help='Generate llvm intrinsics. No input is needed.', action='store_true', default=False)
- args = parser.parse_args()
-
- if not os.path.exists(args.output):
- os.makedirs(args.output)
-
- final_output_dir = args.output
- args.output = MakeTmpDir('_codegen')
-
- rval = 0
- try:
- if args.input:
- functions = parse_ir_builder(args.input)
-
- if args.gen_h:
- generate_gen_h(functions, args.output)
-
- elif args.gen_h:
- print('Need to specify --input for --gen_h!')
-
- if args.gen_meta_h:
- generate_meta_h(args.output)
-
- if args.gen_intrin_h:
- generate_intrin_h(args.output)
-
- rval = CopyDirFilesIfDifferent(args.output, final_output_dir)
-
- except:
- print('ERROR: Could not generate llvm_ir_macros', file=sys.stderr)
- rval = 1
-
- finally:
- DeleteDirTree(args.output)
-
- return rval
-
-if __name__ == '__main__':
- sys.exit(main())
-# END OF FILE
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
deleted file mode 100644
index 4739f2078d6..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
+++ /dev/null
@@ -1,360 +0,0 @@
-# Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import os, sys, re
-from gen_common import *
-from argparse import FileType
-
-'''
-'''
-def gen_llvm_type(type, name, idx, is_pointer, is_pointer_pointer, is_array, is_array_array, array_count, array_count1, is_llvm_struct, is_llvm_enum, is_llvm_pfn, output_file):
-
- llvm_type = ''
-
- if is_llvm_struct:
- if is_pointer or is_pointer_pointer:
- llvm_type = 'Type::getInt32Ty(ctx)'
- else:
- llvm_type = 'ArrayType::get(Type::getInt8Ty(ctx), sizeof(%s))' % type
- elif is_llvm_enum:
- llvm_type = 'Type::getInt32Ty(ctx)'
- elif is_llvm_pfn:
- llvm_type = 'PointerType::get(Type::getInt8Ty(ctx), 0)'
- else:
- if type == 'BYTE' or type == 'char' or type == 'uint8_t' or type == 'int8_t' or type == 'bool':
- llvm_type = 'Type::getInt8Ty(ctx)'
- elif type == 'UINT64' or type == 'INT64' or type == 'uint64_t' or type == 'int64_t' or type == 'gfxptr_t':
- llvm_type = 'Type::getInt64Ty(ctx)'
- elif type == 'UINT16' or type == 'int16_t' or type == 'uint16_t':
- llvm_type = 'Type::getInt16Ty(ctx)'
- elif type == 'UINT' or type == 'INT' or type == 'int' or type == 'BOOL' or type == 'uint32_t' or type == 'int32_t':
- llvm_type = 'Type::getInt32Ty(ctx)'
- elif type == 'float' or type == 'FLOAT':
- llvm_type = 'Type::getFloatTy(ctx)'
- elif type == 'double' or type == 'DOUBLE':
- llvm_type = 'Type::getDoubleTy(ctx)'
- elif type == 'void' or type == 'VOID':
- llvm_type = 'Type::getInt32Ty(ctx)'
- elif type == 'HANDLE':
- llvm_type = 'PointerType::get(Type::getInt32Ty(ctx), 0)'
- elif type == 'simdscalar':
- llvm_type = 'getVectorType(Type::getFloatTy(ctx), pJitMgr->mVWidth)'
- elif type == 'simdscalari':
- llvm_type = 'getVectorType(Type::getInt32Ty(ctx), pJitMgr->mVWidth)'
- elif type == 'simd16scalar':
- llvm_type = 'getVectorType(Type::getFloatTy(ctx), 16)'
- elif type == 'simd16scalari':
- llvm_type = 'getVectorType(Type::getInt32Ty(ctx), 16)'
- elif type == '__m128i':
- llvm_type = 'getVectorType(Type::getInt32Ty(ctx), 4)'
- elif type == 'SIMD256::Float':
- llvm_type = 'getVectorType(Type::getFloatTy(ctx), 8)'
- elif type == 'SIMD256::Integer':
- llvm_type = 'getVectorType(Type::getInt32Ty(ctx), 8)'
- elif type == 'SIMD512::Float':
- llvm_type = 'getVectorType(Type::getFloatTy(ctx), 16)'
- elif type == 'SIMD512::Integer':
- llvm_type = 'getVectorType(Type::getInt32Ty(ctx), 16)'
- elif type == 'simdvector':
- llvm_type = 'ArrayType::get(getVectorType(Type::getFloatTy(ctx), 8), 4)'
- elif type == 'simd16vector':
- llvm_type = 'ArrayType::get(getVectorType(Type::getFloatTy(ctx), 16), 4)'
- elif type == 'SIMD256::Vec4':
- llvm_type = 'ArrayType::get(getVectorType(Type::getFloatTy(ctx), 8), 4)'
- elif type == 'SIMD512::Vec4':
- llvm_type = 'ArrayType::get(getVectorType(Type::getFloatTy(ctx), 16), 4)'
- else:
- llvm_type = 'Gen_%s(pJitMgr)' % type
-
- if is_pointer:
- llvm_type = 'PointerType::get(%s, 0)' % llvm_type
-
- if is_pointer_pointer:
- llvm_type = 'PointerType::get(%s, 0)' % llvm_type
-
- if is_array_array:
- llvm_type = 'ArrayType::get(ArrayType::get(%s, %s), %s)' % (llvm_type, array_count1, array_count)
- elif is_array:
- llvm_type = 'ArrayType::get(%s, %s)' % (llvm_type, array_count)
-
- return {
- 'name' : name,
- 'lineNum' : idx,
- 'type' : llvm_type,
- }
-
-'''
-'''
-def gen_llvm_types(input_file, output_file):
-
- lines = input_file.readlines()
-
- types = []
-
- for idx in range(len(lines)):
- line = lines[idx].rstrip()
-
- if 'gen_llvm_types FINI' in line:
- break
-
- match = re.match(r'(\s*)struct(\s*)(\w+)', line)
- if match:
- llvm_args = []
-
- # Detect start of structure
- is_fwd_decl = re.search(r';', line)
-
- if not is_fwd_decl:
-
- # Extract the command name
- struct_name = match.group(3).strip()
-
- type_entry = {
- 'name' : struct_name,
- 'lineNum' : idx+1,
- 'members' : [],
- }
-
- end_of_struct = False
-
- while not end_of_struct and idx < len(lines)-1:
- idx += 1
- line = lines[idx].rstrip()
-
- is_llvm_typedef = re.search(r'@llvm_typedef', line)
- if is_llvm_typedef is not None:
- is_llvm_typedef = True
- continue
- else:
- is_llvm_typedef = False
-
- ###########################################
- # Is field a llvm struct? Tells script to treat type as array of bytes that is size of structure.
- is_llvm_struct = re.search(r'@llvm_struct', line)
-
- if is_llvm_struct is not None:
- is_llvm_struct = True
- else:
- is_llvm_struct = False
-
- ###########################################
- # Is field the start of a function? Tells script to ignore it
- is_llvm_func_start = re.search(r'@llvm_func_start', line)
-
- if is_llvm_func_start is not None:
- while not end_of_struct and idx < len(lines)-1:
- idx += 1
- line = lines[idx].rstrip()
- is_llvm_func_end = re.search(r'@llvm_func_end', line)
- if is_llvm_func_end is not None:
- break;
- continue
-
- ###########################################
- # Is field a function? Tells script to ignore it
- is_llvm_func = re.search(r'@llvm_func', line)
-
- if is_llvm_func is not None:
- continue
-
- ###########################################
- # Is field a llvm enum? Tells script to treat type as an enum and replaced with uint32 type.
- is_llvm_enum = re.search(r'@llvm_enum', line)
-
- if is_llvm_enum is not None:
- is_llvm_enum = True
- else:
- is_llvm_enum = False
-
- ###########################################
- # Is field a llvm function pointer? Tells script to treat type as an enum and replaced with uint32 type.
- is_llvm_pfn = re.search(r'@llvm_pfn', line)
-
- if is_llvm_pfn is not None:
- is_llvm_pfn = True
- else:
- is_llvm_pfn = False
-
- ###########################################
- # Is field const?
- is_const = re.search(r'\s+const\s+', line)
-
- if is_const is not None:
- is_const = True
- else:
- is_const = False
-
- ###########################################
- # Is field a pointer?
- is_pointer_pointer = re.search('\*\*', line)
-
- if is_pointer_pointer is not None:
- is_pointer_pointer = True
- else:
- is_pointer_pointer = False
-
- ###########################################
- # Is field a pointer?
- is_pointer = re.search('\*', line)
-
- if is_pointer is not None:
- is_pointer = True
- else:
- is_pointer = False
-
- ###########################################
- # Is field an array of arrays?
- # TODO: Can add this to a list.
- is_array_array = re.search('\[(\w*)\]\[(\w*)\]', line)
- array_count = '0'
- array_count1 = '0'
-
- if is_array_array is not None:
- array_count = is_array_array.group(1)
- array_count1 = is_array_array.group(2)
- is_array_array = True
- else:
- is_array_array = False
-
- ###########################################
- # Is field an array?
- is_array = re.search('\[(\w*)\]', line)
-
- if is_array is not None:
- array_count = is_array.group(1)
- is_array = True
- else:
- is_array = False
-
- is_scoped = re.search('::', line)
-
- if is_scoped is not None:
- is_scoped = True
- else:
- is_scoped = False
-
- type = None
- name = None
- if is_const and is_pointer:
-
- if is_scoped:
- field_match = re.match(r'(\s*)(\w+\<*\w*\>*)(\s+)(\w+::)(\w+)(\s*\**\s*)(\w+)', line)
-
- type = '%s%s' % (field_match.group(4), field_match.group(5))
- name = field_match.group(7)
- else:
- field_match = re.match(r'(\s*)(\w+\<*\w*\>*)(\s+)(\w+)(\s*\**\s*)(\w+)', line)
-
- type = field_match.group(4)
- name = field_match.group(6)
-
- elif is_pointer:
- field_match = re.match(r'(\s*)(\s+)(\w+\<*\w*\>*)(\s*\**\s*)(\w+)', line)
-
- if field_match:
- type = field_match.group(3)
- name = field_match.group(5)
- elif is_const:
- field_match = re.match(r'(\s*)(\w+\<*\w*\>*)(\s+)(\w+)(\s*)(\w+)', line)
-
- if field_match:
- type = field_match.group(4)
- name = field_match.group(6)
- else:
- if is_scoped:
- field_match = re.match(r'\s*(\w+\<*\w*\>*)\s*::\s*(\w+\<*\w*\>*)\s+(\w+)', line)
-
- if field_match:
- type = field_match.group(1) + '::' + field_match.group(2)
- name = field_match.group(3)
- else:
- field_match = re.match(r'(\s*)(\w+\<*\w*\>*)(\s+)(\w+)', line)
-
- if field_match:
- type = field_match.group(2)
- name = field_match.group(4)
-
- if is_llvm_typedef is False:
- if type is not None:
- type_entry['members'].append(
- gen_llvm_type(
- type, name, idx+1, is_pointer, is_pointer_pointer, is_array, is_array_array,
- array_count, array_count1, is_llvm_struct, is_llvm_enum, is_llvm_pfn, output_file))
-
- # Detect end of structure
- end_of_struct = re.match(r'(\s*)};', line)
-
- if end_of_struct:
- types.append(type_entry)
-
- cur_dir = os.path.dirname(os.path.abspath(__file__))
- template = os.path.join(cur_dir, 'templates', 'gen_llvm.hpp')
-
- MakoTemplateWriter.to_file(
- template,
- output_file,
- cmdline=sys.argv,
- filename=os.path.basename(output_file),
- types=types,
- input_dir=os.path.dirname(input_file.name),
- input_file=os.path.basename(input_file.name))
-
-'''
- Function which is invoked when this script is started from a command line.
- Will present and consume a set of arguments which will tell this script how
- to behave
-'''
-def main():
-
- # Parse args...
- parser = ArgumentParser()
- parser.add_argument('--input', '-i', type=FileType('r'),
- help='Path to input file containing structs', required=True)
- parser.add_argument('--output', '-o', action='store',
- help='Path to output file', required=True)
- args = parser.parse_args()
-
- final_output_dir = os.path.dirname(args.output)
- if MakeDir(final_output_dir):
- return 1
-
- final_output_file = args.output
-
- tmp_dir = MakeTmpDir('_codegen')
- args.output = os.path.join(tmp_dir, os.path.basename(args.output))
-
- rval = 0
- try:
- gen_llvm_types(args.input, args.output)
-
- rval = CopyFileIfDifferent(args.output, final_output_file)
- except:
- print('ERROR: Could not generate llvm types', file=sys.stderr)
- rval = 1
-
- finally:
- DeleteDirTree(tmp_dir)
-
- return rval
-
-if __name__ == '__main__':
- sys.exit(main())
-# END OF FILE
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
deleted file mode 100644
index 75eae353ae1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
+++ /dev/null
@@ -1,383 +0,0 @@
-# Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-import sys
-
-# Python source
-KNOBS = [
-
- ['ENABLE_ASSERT_DIALOGS', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Use dialogs when asserts fire.',
- 'Asserts are only enabled in debug builds'],
- 'category' : 'debug',
- }],
-
- ['SINGLE_THREADED', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['If enabled will perform all rendering on the API thread.',
- 'This is useful mainly for debugging purposes.'],
- 'category' : 'debug',
- }],
-
- ['DUMP_SHADER_IR', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Dumps shader LLVM IR at various stages of jit compilation.'],
- 'category' : 'debug',
- }],
-
- ['USE_GENERIC_STORETILE', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Always use generic function for performing StoreTile.',
- 'Will be slightly slower than using optimized (jitted) path'],
- 'category' : 'debug_adv',
- }],
-
- ['FAST_CLEAR', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Replace 3D primitive execute with a SWRClearRT operation and',
- 'defer clear execution to first backend op on hottile, or hottile store'],
- 'category' : 'perf_adv',
- }],
-
- ['MAX_NUMA_NODES', {
- 'type' : 'uint32_t',
- 'default' : '1' if sys.platform == 'win32' else '0',
- 'desc' : ['Maximum # of NUMA-nodes per system used for worker threads',
- ' 0 == ALL NUMA-nodes in the system',
- ' N == Use at most N NUMA-nodes for rendering'],
- 'category' : 'perf',
- }],
-
- ['MAX_CORES_PER_NUMA_NODE', {
- 'type' : 'uint32_t',
- 'default' : '0',
- 'desc' : ['Maximum # of cores per NUMA-node used for worker threads.',
- ' 0 == ALL non-API thread cores per NUMA-node',
- ' N == Use at most N cores per NUMA-node'],
- 'category' : 'perf',
- }],
-
- ['MAX_THREADS_PER_CORE', {
- 'type' : 'uint32_t',
- 'default' : '1',
- 'desc' : ['Maximum # of (hyper)threads per physical core used for worker threads.',
- ' 0 == ALL hyper-threads per core',
- ' N == Use at most N hyper-threads per physical core'],
- 'category' : 'perf',
- }],
-
- ['MAX_WORKER_THREADS', {
- 'type' : 'uint32_t',
- 'default' : '0',
- 'desc' : ['Maximum worker threads to spawn.',
- '',
- 'IMPORTANT: If this is non-zero, no worker threads will be bound to',
- 'specific HW threads. They will all be "floating" SW threads.',
- 'In this case, the above 3 KNOBS will be ignored.'],
- 'category' : 'perf',
- }],
-
- ['BASE_NUMA_NODE', {
- 'type' : 'uint32_t',
- 'default' : '0',
- 'desc' : ['Starting NUMA node index to use when allocating compute resources.',
- 'Setting this to a non-zero value will reduce the maximum # of NUMA nodes used.'],
- 'category' : 'perf',
- }],
-
- ['BASE_CORE', {
- 'type' : 'uint32_t',
- 'default' : '0',
- 'desc' : ['Starting core index to use when allocating compute resources.',
- 'Setting this to a non-zero value will reduce the maximum # of cores used.'],
- 'category' : 'perf',
- }],
-
- ['BASE_THREAD', {
- 'type' : 'uint32_t',
- 'default' : '0',
- 'desc' : ['Starting thread index to use when allocating compute resources.',
- 'Setting this to a non-zero value will reduce the maximum # of threads used.'],
- 'category' : 'perf',
- }],
-
- ['BUCKETS_START_FRAME', {
- 'type' : 'uint32_t',
- 'default' : '1200',
- 'desc' : ['Frame from when to start saving buckets data.',
- '',
- 'NOTE: KNOB_ENABLE_RDTSC must be enabled in core/knobs.h',
- 'for this to have an effect.'],
- 'category' : 'perf_adv',
- }],
-
- ['BUCKETS_END_FRAME', {
- 'type' : 'uint32_t',
- 'default' : '1400',
- 'desc' : ['Frame at which to stop saving buckets data.',
- '',
- 'NOTE: KNOB_ENABLE_RDTSC must be enabled in core/knobs.h',
- 'for this to have an effect.'],
- 'category' : 'perf_adv',
- }],
-
- ['WORKER_SPIN_LOOP_COUNT', {
- 'type' : 'uint32_t',
- 'default' : '5000',
- 'desc' : ['Number of spin-loop iterations worker threads will perform',
- 'before going to sleep when waiting for work'],
- 'category' : 'perf_adv',
- }],
-
- ['MAX_DRAWS_IN_FLIGHT', {
- 'type' : 'uint32_t',
- 'default' : '256',
- 'desc' : ['Maximum number of draws outstanding before API thread blocks.',
- 'This value MUST be evenly divisible into 2^32'],
- 'category' : 'perf_adv',
- }],
-
- ['MAX_PRIMS_PER_DRAW', {
- 'type' : 'uint32_t',
- 'default' : '49152',
- 'desc' : ['Maximum primitives in a single Draw().',
- 'Larger primitives are split into smaller Draw calls.',
- 'Should be a multiple of (3 * vectorWidth).'],
- 'category' : 'perf_adv',
- }],
-
- ['MAX_TESS_PRIMS_PER_DRAW', {
- 'type' : 'uint32_t',
- 'default' : '16',
- 'desc' : ['Maximum primitives in a single Draw() with tessellation enabled.',
- 'Larger primitives are split into smaller Draw calls.',
- 'Should be a multiple of (vectorWidth).'],
- 'category' : 'perf_adv',
- }],
-
-
- ['DEBUG_OUTPUT_DIR', {
- 'type' : 'std::string',
- 'default' : r'%TEMP%\Rast\DebugOutput' if sys.platform == 'win32' else '/tmp/Rast/DebugOutput',
- 'desc' : ['Output directory for debug data.'],
- 'category' : 'debug',
- }],
-
- ['JIT_ENABLE_CACHE', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Enables caching of compiled shaders'],
- 'category' : 'debug_adv',
- }],
-
- ['JIT_OPTIMIZATION_LEVEL', {
- 'type' : 'int',
- 'default' : '-1',
- 'desc' : ['JIT compile optimization level:',],
- 'category' : 'debug',
- 'control' : 'dropdown',
- 'choices' : [
- {
- 'name' : 'Automatic',
- 'desc' : 'Automatic based on other KNOB and build settings',
- 'value' : -1,
- },
- {
- 'name' : 'Debug',
- 'desc' : 'No optimization: -O0',
- 'value' : 0,
- },
- {
- 'name' : 'Less',
- 'desc' : 'Some optimization: -O1',
- 'value' : 1,
- },
- {
- 'name' : 'Optimize',
- 'desc' : 'Default Clang / LLVM optimizations: -O2',
- 'value' : 2,
- },
- {
- 'name' : 'Aggressive',
- 'desc' : 'Maximum optimization: -O3',
- 'value' : 3,
- },
- ],
- }],
-
- ['JIT_CACHE_DIR', {
- 'type' : 'std::string',
- 'default' : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else '${HOME}/.swr/jitcache',
- 'desc' : ['Cache directory for compiled shaders.'],
- 'category' : 'debug',
- }],
-
- ['TOSS_DRAW', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Disable per-draw/dispatch execution'],
- 'category' : 'perf',
- }],
-
- ['TOSS_QUEUE_FE', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at worker FE',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['TOSS_FETCH', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at vertex fetch',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['TOSS_IA', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at input assembler',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['TOSS_VS', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at vertex shader',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['TOSS_SETUP_TRIS', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at primitive setup',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['TOSS_BIN_TRIS', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at primitive binning',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['TOSS_RS', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Stop per-draw execution at rasterizer',
- '',
- 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'],
- 'category' : 'perf_adv',
- }],
-
- ['DISABLE_SPLIT_DRAW', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Don\'t split large draws into smaller draws.,',
- 'MAX_PRIMS_PER_DRAW and MAX_TESS_PRIMS_PER_DRAW can be used to control split size.',
- '',
- 'Useful to disable split draws for gathering archrast stats.'],
- 'category' : 'perf_adv',
- }],
-
- ['AR_ENABLE_PIPELINE_STATS', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Enable pipeline stats when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_SHADER_STATS', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Enable shader stats when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_SWTAG_DATA', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Enable SWTag data when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_SWR_EVENTS', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Enable internal SWR events when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_PIPELINE_EVENTS', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Enable pipeline events when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_SHADER_EVENTS', {
- 'type' : 'bool',
- 'default' : 'true',
- 'desc' : ['Enable shader events when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_SWTAG_EVENTS', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Enable SWTag events when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_ENABLE_MEMORY_EVENTS', {
- 'type' : 'bool',
- 'default' : 'false',
- 'desc' : ['Enable memory events when using Archrast'],
- 'category' : 'archrast',
- }],
-
- ['AR_MEM_SET_BYTE_GRANULARITY', {
- 'type' : 'uint32_t',
- 'default' : '64',
- 'desc' : ['Granularity and alignment of tracking of memory accesses',
- 'ONLY ACTIVE UNDER ArchRast.'],
- 'category' : 'archrast',
- }],
-
-
- ]
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/meson.build b/src/gallium/drivers/swr/rasterizer/codegen/meson.build
deleted file mode 100644
index daf79ed4c26..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/meson.build
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright © 2017-2018 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-gen_knobs_cpp = custom_target(
- 'gen_knobs.cpp',
- input : ['gen_knobs.py'],
- output : 'gen_knobs.cpp',
- command : [prog_python, '@INPUT0@', '--output', '@OUTPUT@', '--gen_cpp'],
- depend_files : files(
- 'knob_defs.py', 'gen_common.py',
- 'templates/gen_knobs.cpp',
- ),
-)
-
-gen_knobs_h = custom_target(
- 'gen_knobs.h',
- input : ['gen_knobs.py'],
- output : 'gen_knobs.h',
- command : [prog_python, '@INPUT0@', '--output', '@OUTPUT@', '--gen_h'],
- depend_files : files(
- 'knob_defs.py', 'gen_common.py',
- 'templates/gen_knobs.h',
- ),
-)
-
-
-# The generators above this are needed individually, while the below generators
-# are all inputs to the same lib, so they don't need unique names.
-files_swr_common += [
- gen_builder_hpp, gen_builder_meta_hpp, gen_knobs_h, gen_knobs_cpp
-]
-
-foreach x : [[swr_context_files, 'gen_swr_context_llvm.h'],
- [swr_state_files, 'gen_state_llvm.h'],
- [swr_surf_state_files, 'gen_surf_state_llvm.h']]
- files_swr_common += custom_target(
- x[1],
- input : ['gen_llvm_types.py', x[0]],
- output : x[1],
- command : [prog_python, '@INPUT0@', '--input', '@INPUT1@', '--output', '@OUTPUT@'],
- depend_files : files(
- 'templates/gen_llvm.hpp',
- 'gen_common.py',
- ),
- )
-endforeach
-
-ar_output_filenames = ['gen_ar_event.hpp', 'gen_ar_event.cpp', 'gen_ar_eventhandler.hpp', 'gen_ar_eventhandlerfile.hpp']
-ar_template_filenames = []
-foreach fname : ar_output_filenames
- ar_template_filenames += join_paths('templates', fname)
-endforeach
-
-files_swr_common += custom_target(
- 'gen_archrast',
- input : ['gen_archrast.py', swr_event_proto_files, swr_event_pproto_files],
- output : ar_output_filenames,
- command : [prog_python, '@INPUT0@', '--proto', '@INPUT1@', '@INPUT2@', '--output-dir', meson.current_build_dir()],
- depend_files : files('gen_common.py', ar_template_filenames)
-)
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
deleted file mode 100644
index e73a8110ee1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}
- *
- * @brief Implementation for events. auto-generated file
- *
- * DO NOT EDIT
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format off
-#include "common/os.h"
-#include "gen_ar_event.hpp"
-#include "gen_ar_eventhandler.hpp"
-
-using namespace ArchRast;
-
-<% sorted_groups = sorted(protos['events']['groups']) %>
-% for group in sorted_groups:
-% for event_key in protos['events']['groups'][group]:
-<%
- event = protos['events']['defs'][event_key]
-%>
-void ${event['name']}::Accept(EventHandler* pHandler) const
-{
- pHandler->Handle(*this);
-}
-% endfor
-% endfor
-
-
-// clan-format on
-
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
deleted file mode 100644
index 3ef99da2249..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
+++ /dev/null
@@ -1,168 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}
- *
- * @brief Definitions for events. auto-generated file
- *
- * DO NOT EDIT
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format off
-#pragma once
-
-#include "common/os.h"
-#include "core/state.h"
-
-<%
- always_enabled_knob_groups = ['Framework', 'SWTagFramework', 'ApiSwr']
- group_knob_remap_table = {
- "ShaderStats": "KNOB_AR_ENABLE_SHADER_STATS",
- "PipelineStats" : "KNOB_AR_ENABLE_PIPELINE_STATS",
- "SWTagData" : "KNOB_AR_ENABLE_SWTAG_DATA",
- }
-%>
-namespace ArchRast
-{
-<% sorted_enums = sorted(protos['enums']['defs']) %>
-% for name in sorted_enums:
- enum ${name}
- {<% names = protos['enums']['defs'][name]['names'] %>
- % for i in range(len(names)):
- ${names[i].lstrip()}
- % endfor
- };
-% endfor
-
- // Forward decl
- class EventHandler;
-
- //////////////////////////////////////////////////////////////////////////
- /// Event - interface for handling events.
- //////////////////////////////////////////////////////////////////////////
- struct Event
- {
- const uint32_t eventId = {0xFFFFFFFF};
- Event() {}
- virtual ~Event() {}
-
- virtual bool IsEnabled() const { return true; };
- virtual const uint32_t GetEventId() const = 0;
- virtual void Accept(EventHandler* pHandler) const = 0;
- };
-
-<% sorted_groups = sorted(protos['events']['groups']) %>
-% for group in sorted_groups:
- % for event_key in protos['events']['groups'][group]:
-<%
- event = protos['events']['defs'][event_key]
-%>
- //////////////////////////////////////////////////////////////////////////
- /// ${event_key}Data
- //////////////////////////////////////////////////////////////////////////
-#pragma pack(push, 1)
- struct ${event['name']}Data
- {<%
- fields = event['fields'] %>
- // Fields
- % for i in range(len(fields)):
- % if fields[i]['size'] > 1:
- ${fields[i]['type']} ${fields[i]['name']}[${fields[i]['size']}];
- % else:
- ${fields[i]['type']} ${fields[i]['name']};
- % endif
- % endfor
- };
-#pragma pack(pop)
-
- //////////////////////////////////////////////////////////////////////////
- /// ${event_key}
- //////////////////////////////////////////////////////////////////////////
- struct ${event['name']} : Event
- {<%
- fields = event['fields'] %>
- const uint32_t eventId = {${ event['id'] }};
- ${event['name']}Data data;
-
- // Constructor
- ${event['name']}(
- % for i in range(len(fields)):
- % if i < len(fields)-1:
- % if fields[i]['size'] > 1:
- ${fields[i]['type']}* ${fields[i]['name']},
- uint32_t ${fields[i]['name']}_size,
- % else:
- ${fields[i]['type']} ${fields[i]['name']},
- % endif
- % endif
- % if i == len(fields)-1:
- % if fields[i]['size'] > 1:
- ${fields[i]['type']}* ${fields[i]['name']},
- uint32_t ${fields[i]['name']}_size
- % else:
- ${fields[i]['type']} ${fields[i]['name']}
- % endif
- % endif
- % endfor
- )
- {
- % for i in range(len(fields)):
- % if fields[i]['size'] > 1:
- % if fields[i]['type'] == 'char':
- // Copy size of string (null-terminated) followed by string into entire buffer
- SWR_ASSERT(${fields[i]['name']}_size + 1 < ${fields[i]['size']} - sizeof(uint32_t), "String length must be less than size of char buffer - size(uint32_t)!");
- memcpy(data.${fields[i]['name']}, &${fields[i]['name']}_size, sizeof(uint32_t));
- strcpy_s(data.${fields[i]['name']} + sizeof(uint32_t), ${fields[i]['name']}_size + 1, ${fields[i]['name']});
- % else:
- memcpy(data.${fields[i]['name']}, ${fields[i]['name']}, ${fields[i]['name']}_size);
- % endif
- % else:
- data.${fields[i]['name']} = ${fields[i]['name']};
- % endif
- % endfor
- }
-
- virtual void Accept(EventHandler* pHandler) const;
- inline const uint32_t GetEventId() const { return eventId; }
- % if group not in always_enabled_knob_groups:
- <%
- if group in group_knob_remap_table:
- group_knob_define = group_knob_remap_table[group]
- else:
- group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS'
- %>
- bool IsEnabled() const
- {
- static const bool IsEventEnabled = true; // TODO: Replace with knob for each event
- return ${group_knob_define} && IsEventEnabled;
- }
- % endif
- };
-
- % endfor
-
-% endfor
-} // namespace ArchRast
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
deleted file mode 100644
index d3e82e8a4ee..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}
- *
- * @brief Event handler interface. auto-generated file
- *
- * DO NOT EDIT
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format on
-#pragma once
-
-#include "${event_header}"
-
-namespace ArchRast
-{
- //////////////////////////////////////////////////////////////////////////
- /// EventHandler - interface for handling events.
- //////////////////////////////////////////////////////////////////////////
- class EventHandler
- {
- public:
- EventHandler() {}
- virtual ~EventHandler() {}
-
- virtual void FlushDraw(uint32_t drawId) {}
-
-<% sorted_groups = sorted(protos['events']['groups']) %>
-% for group in sorted_groups:
-% for event_key in protos['events']['groups'][group]:
-<%
- event = protos['events']['defs'][event_key]
-%> virtual void Handle(const ${event['name']}& event) {}
-% endfor
-% endfor
- };
-} // namespace ArchRast
-// clan-format off
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
deleted file mode 100644
index ba5a51700f3..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}
- *
- * @brief Event handler interface. auto-generated file
- *
- * DO NOT EDIT
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format off
-#pragma once
-
-#include "common/os.h"
-#include "${event_header}"
-#include <fstream>
-#include <sstream>
-#include <iostream>
-#include <thread>
-
-namespace ArchRast
-{
- //////////////////////////////////////////////////////////////////////////
- /// EventHandlerFile - interface for handling events.
- //////////////////////////////////////////////////////////////////////////
- class EventHandlerFile : public EventHandler
- {
- public:
- EventHandlerFile(uint32_t id) : mBufOffset(0)
- {
-#if defined(_WIN32)
- DWORD pid = GetCurrentProcessId();
- TCHAR procname[MAX_PATH];
- GetModuleFileName(NULL, procname, MAX_PATH);
- const char* pBaseName = strrchr(procname, '\\');
- std::stringstream outDir;
- outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
- mOutputDir = outDir.str();
- if (CreateDirectory(mOutputDir.c_str(), NULL))
- {
- std::cout << std::endl
- << "ArchRast Dir: " << mOutputDir << std::endl
- << std::endl
- << std::flush;
- }
-
- // There could be multiple threads creating thread pools. We
- // want to make sure they are uniquely identified by adding in
- // the creator's thread id into the filename.
- std::stringstream fstr;
- fstr << outDir.str().c_str() << "\\ar_event" << std::this_thread::get_id();
- fstr << "_" << id << ".bin" << std::ends;
- mFilename = fstr.str();
-#else
- // There could be multiple threads creating thread pools. We
- // want to make sure they are uniquely identified by adding in
- // the creator's thread id into the filename.
- std::stringstream fstr;
- fstr << "/tmp/ar_event" << std::this_thread::get_id();
- fstr << "_" << id << ".bin" << std::ends;
- mFilename = fstr.str();
-#endif
- }
-
- virtual ~EventHandlerFile() { FlushBuffer(); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Flush buffer to file.
- bool FlushBuffer()
- {
- if (mBufOffset > 0)
- {
- if (mBufOffset == mHeaderBufOffset)
- {
- // Nothing to flush. Only header has been generated.
- return false;
- }
-
- std::ofstream file;
- file.open(mFilename, std::ios::out | std::ios::app | std::ios::binary);
-
- if (!file.is_open())
- {
- SWR_INVALID("ArchRast: Could not open event file!");
- return false;
- }
-
- file.write((char*)mBuffer, mBufOffset);
- file.close();
-
- mBufOffset = 0;
- mHeaderBufOffset = 0; // Reset header offset so its no longer considered.
- }
- return true;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Write event and its payload to the memory buffer.
- void Write(uint32_t eventId, const char* pBlock, uint32_t size)
- {
- if ((mBufOffset + size + sizeof(eventId)) > mBufferSize)
- {
- if (!FlushBuffer())
- {
- // Don't corrupt what's already in the buffer?
- /// @todo Maybe add corrupt marker to buffer here in case we can open file in
- /// future?
- return;
- }
- }
-
- memcpy(&mBuffer[mBufOffset], (char*)&eventId, sizeof(eventId));
- mBufOffset += sizeof(eventId);
- memcpy(&mBuffer[mBufOffset], pBlock, size);
- mBufOffset += size;
- }
-<% sorted_groups = sorted(protos['events']['groups']) %>
-% for group in sorted_groups:
-% for event_key in protos['events']['groups'][group]:
-<%
- event = protos['events']['defs'][event_key]
-%>
- //////////////////////////////////////////////////////////////////////////
- /// @brief Handle ${event_key} event
- virtual void Handle(const ${event['name']}& event)
- {
-% if event['num_fields'] == 0:
- Write(event.eventId, (char*)&event.data, 0);
-% else:
- Write(event.eventId, (char*)&event.data, sizeof(event.data));
-% endif
- }
-% endfor
-% endfor
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Everything written to buffer this point is the header.
- virtual void MarkHeader()
- {
- mHeaderBufOffset = mBufOffset;
- }
-
- std::string mFilename;
- std::string mOutputDir;
-
- static const uint32_t mBufferSize = 1024;
- uint8_t mBuffer[mBufferSize];
- uint32_t mBufOffset{0};
- uint32_t mHeaderBufOffset{0};
- };
-} // namespace ArchRast
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp
deleted file mode 100644
index b8da5298f3d..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//============================================================================
-// Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a
-// copy of this software and associated documentation files (the "Software"),
-// to deal in the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice (including the next
-// paragraph) shall be included in all copies or substantial portions of the
-// Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-// IN THE SOFTWARE.
-//
-// @file BackendPixelRate${fileNum}.cpp
-//
-// @brief auto-generated file
-//
-// DO NOT EDIT
-//
-// Generation Command Line:
-// ${'\n// '.join(cmdline)}
-//
-//============================================================================
-
-#include "core/backend.h"
-#include "core/backend_impl.h"
-
-void InitBackendPixelRate${fileNum}()
-{
- %for func in funcList:
- ${func}
- %endfor
-}
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
deleted file mode 100644
index da1ca87620a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-//============================================================================
-// Copyright (C) 2014-2020 Intel Corporation. All Rights Reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a
-// copy of this software and associated documentation files (the "Software"),
-// to deal in the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice (including the next
-// paragraph) shall be included in all copies or substantial portions of the
-// Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-// IN THE SOFTWARE.
-//
-// @file ${filename}
-//
-// @brief auto-generated file
-//
-// DO NOT EDIT
-//
-// Generation Command Line:
-// ${'\n// '.join(cmdline)}
-//
-//============================================================================
-// clang-format off
-#pragma once
-
-//============================================================================
-// Auto-generated ${comment}
-//============================================================================
-%for func in functions:
-<%argList = ', '.join(func['args'])%>\
-${func['decl']}
-{
-%if isX86:
- %if len(func['args']) != 0:
- SmallVector<Type*, ${len(func['args'])}> argTypes;
- %for arg in func['args']:
- argTypes.push_back(${arg}->getType());
- %endfor
-#if LLVM_VERSION_MAJOR >= 12
- #define VEC_GET_NUM_ELEMS cast<FixedVectorType>(a->getType())->getNumElements()
-#elif LLVM_VERSION_MAJOR >= 11
- #define VEC_GET_NUM_ELEMS cast<VectorType>(a->getType())->getNumElements()
-#else
- #define VEC_GET_NUM_ELEMS a->getType()->getVectorNumElements()
-#endif
- FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, argTypes, false);
- %else:
- FunctionType* pFuncTy = FunctionType::get(${ func['returnType'] }, {}, false);
- %endif:
-#if LLVM_VERSION_MAJOR >= 9
- Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("meta.intrinsic.${func['name']}", pFuncTy).getCallee());
-#else
- Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("meta.intrinsic.${func['name']}", pFuncTy));
-#endif
- return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
-%elif isIntrin:
- %if len(func['types']) != 0:
- SmallVector<Type*, ${len(func['types'])}> args;
- %for arg in func['types']:
- args.push_back(${arg}->getType());
- %endfor
- Function* pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']}, args);
- return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
- %else:
- Function* pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
- return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
- %endif
-%else:
- return IRB()->${func['intrin']}(${argList});
-%endif
-}
-
-% endfor
- // clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp
deleted file mode 100644
index d0682c55f03..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//============================================================================
-// Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a
-// copy of this software and associated documentation files (the "Software"),
-// to deal in the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice (including the next
-// paragraph) shall be included in all copies or substantial portions of the
-// Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-// IN THE SOFTWARE.
-//
-// @file ${filename}
-//
-// @brief auto-generated file
-//
-// DO NOT EDIT
-//
-// Generation Command Line:
-// ${'\n// '.join(cmdline)}
-//
-//============================================================================
-
-// clang-format off
-
-%for num in range(numFiles):
-void Init${tableName}${num}();
-%endfor
-
-static INLINE void Init${tableName}()
-{
- %for num in range(numFiles):
- Init${tableName}${num}();
- %endfor
-}
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
deleted file mode 100644
index 194499aa1e0..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/******************************************************************************
- * Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}.cpp
- *
- * @brief Dynamic Knobs for Core.
- *
- * ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format off
-<% calc_max_knob_len(knobs) %>
-% for inc in includes:
-#include <${inc}>
-% endfor
-#include <regex>
-#include <core/utils.h>
-
-//========================================================
-// Implementation
-//========================================================
-void KnobBase::autoExpandEnvironmentVariables(std::string& text)
-{
- size_t start;
- while ((start = text.find("${'${'}")) != std::string::npos)
- {
- size_t end = text.find("}");
- if (end == std::string::npos)
- break;
- const std::string var = GetEnv(text.substr(start + 2, end - start - 2));
- text.replace(start, end - start + 1, var);
- }
- // win32 style variable replacement
- while ((start = text.find("%")) != std::string::npos)
- {
- size_t end = text.find("%", start + 1);
- if (end == std::string::npos)
- break;
- const std::string var = GetEnv(text.substr(start + 1, end - start - 1));
- text.replace(start, end - start + 1, var);
- }
-}
-
-//========================================================
-// Static Data Members
-//========================================================
-% for knob in knobs:
-% if knob[1]['type'] == 'std::string':
-${knob[1]['type']} GlobalKnobs::Knob_${knob[0]}::m_default = "${repr(knob[1]['default'])[1:-1]}";
-% else:
-${knob[1]['type']} GlobalKnobs::Knob_${knob[0]}::m_default = ${knob[1]['default']};
-% endif
-% endfor
-GlobalKnobs g_GlobalKnobs;
-
-//========================================================
-// Knob Initialization
-//========================================================
-GlobalKnobs::GlobalKnobs()
-{
- % for knob in knobs :
- InitKnob(${ knob[0] });
- % endfor
-}
-
-//========================================================
-// Knob Display (Convert to String)
-//========================================================
-std::string GlobalKnobs::ToString(const char* optPerLinePrefix)
-{
- std::basic_stringstream<char> str;
- str << std::showbase << std::setprecision(1) << std::fixed;
-
- if (optPerLinePrefix == nullptr)
- {
- optPerLinePrefix = "";
- }
-
- % for knob in knobs:
- str << optPerLinePrefix << "KNOB_${knob[0]}:${space_knob(knob[0])}";
- % if knob[1]['type'] == 'bool':
- str << (KNOB_${knob[0]} ? "+\n" : "-\n");
- % elif knob[1]['type'] != 'float' and knob[1]['type'] != 'std::string':
- str << std::hex << std::setw(11) << std::left << KNOB_${knob[0]};
- str << std::dec << KNOB_${knob[0]} << "\n";
- % else:
- str << KNOB_${knob[0]} << "\n";
- % endif
- % endfor
- str << std::ends;
-
- return str.str();
-}
-<%!
- # Globally available python
- max_len = 0
- def calc_max_knob_len(knobs):
- global max_len
- max_len = 0
- for knob in knobs:
- if len(knob[0]) > max_len: max_len = len(knob[0])
- max_len += len('KNOB_ ')
- if max_len % 4: max_len += 4 - (max_len % 4)
-
- def space_knob(knob):
- knob_len = len('KNOB_' + knob)
- return ' '*(max_len - knob_len)
-
- def calc_max_name_len(choices_array):
- _max_len = 0
- for choice in choices_array:
- if len(choice['name']) > _max_len: _max_len = len(choice['name'])
-
- if _max_len % 4: _max_len += 4 - (_max_len % 4)
- return _max_len
-
- def space_name(name, max_len):
- name_len = len(name)
- return ' '*(max_len - name_len)
-%>
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
deleted file mode 100644
index 8b88a11706c..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/******************************************************************************
- * Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}.h
- *
- * @brief Dynamic Knobs for Core.
- *
- * ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format off
-<% calc_max_knob_len(knobs) %>
-#pragma once
-#include <string>
-
-struct KnobBase
-{
-private:
- // Update the input string.
- static void autoExpandEnvironmentVariables(std::string& text);
-
-protected:
- // Leave input alone and return new string.
- static std::string expandEnvironmentVariables(std::string const& input)
- {
- std::string text = input;
- autoExpandEnvironmentVariables(text);
- return text;
- }
-
- template <typename T>
- static T expandEnvironmentVariables(T const& input)
- {
- return input;
- }
-};
-
-template <typename T>
-struct Knob : KnobBase
-{
-public:
- const T& Value() const { return m_Value; }
- const T& Value(T const& newValue)
- {
- m_Value = expandEnvironmentVariables(newValue);
- return Value();
- }
-
-private:
- T m_Value;
-};
-
-#define DEFINE_KNOB(_name, _type) \\
-
- struct Knob_##_name : Knob<_type> \\
-
- { \\
-
- static const char* Name() { return "KNOB_" #_name; } \\
-
- static _type DefaultValue() { return (m_default); } \\
-
- private: \\
-
- static _type m_default; \\
-
- } _name;
-
-#define GET_KNOB(_name) g_GlobalKnobs._name.Value()
-#define SET_KNOB(_name, _newValue) g_GlobalKnobs._name.Value(_newValue)
-
-struct GlobalKnobs
-{
- % for knob in knobs:
- //-----------------------------------------------------------
- // KNOB_${knob[0]}
- //
- % for line in knob[1]['desc']:
- // ${line}
- % endfor
- % if knob[1].get('choices'):
- <%
- choices = knob[1].get('choices')
- _max_len = calc_max_name_len(choices) %>//
- % for i in range(len(choices)):
- // ${choices[i]['name']}${space_name(choices[i]['name'], _max_len)} = ${format(choices[i]['value'], '#010x')}
- % endfor
- % endif
- //
- DEFINE_KNOB(${knob[0]}, ${knob[1]['type']});
-
- % endfor
-
- std::string ToString(const char* optPerLinePrefix="");
- GlobalKnobs();
-};
-extern GlobalKnobs g_GlobalKnobs;
-
-#undef DEFINE_KNOB
-
-% for knob in knobs:
-#define KNOB_${knob[0]}${space_knob(knob[0])} GET_KNOB(${knob[0]})
-% endfor
-
-<%!
- # Globally available python
- max_len = 0
- def calc_max_knob_len(knobs):
- global max_len
- max_len = 0
- for knob in knobs:
- if len(knob[0]) > max_len: max_len = len(knob[0])
- max_len += len('KNOB_ ')
- if max_len % 4: max_len += 4 - (max_len % 4)
-
- def space_knob(knob):
- knob_len = len('KNOB_' + knob)
- return ' '*(max_len - knob_len)
-
- def calc_max_name_len(choices_array):
- _max_len = 0
- for choice in choices_array:
- if len(choice['name']) > _max_len: _max_len = len(choice['name'])
-
- if _max_len % 4: _max_len += 4 - (_max_len % 4)
- return _max_len
-
- def space_name(name, max_len):
- name_len = len(name)
- return ' '*(max_len - name_len)
-%>
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
deleted file mode 100644
index 99a3f300bba..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file ${filename}
- *
- * @brief auto-generated file
- *
- * DO NOT EDIT
- *
- * Generation Command Line:
- * ${'\n * '.join(cmdline)}
- *
- ******************************************************************************/
-// clang-format off
-
-#include <llvm/IR/DerivedTypes.h>
-
-#pragma once
-
-namespace SwrJit
-{
- using namespace llvm;
-
-%for type in types:
- INLINE static StructType* Gen_${type['name']}(JitManager* pJitMgr)
- {
- %if needs_ctx(type):
- LLVMContext& ctx = pJitMgr->mContext;
-
- %endif
-#if LLVM_VERSION_MAJOR >= 12
- StructType* pRetType = StructType::getTypeByName(pJitMgr->mContext, "${type['name']}");
-#else
- StructType* pRetType = pJitMgr->mpCurrentModule->getTypeByName("${type['name']}");
-#endif
- if (pRetType == nullptr)
- {
- std::vector<Type*> members =<% (max_type_len, max_name_len) = calc_max_len(type['members']) %>
- {
- %for member in type['members']:
- /* ${member['name']} ${pad(len(member['name']), max_name_len)}*/ ${member['type']},
- %endfor
- };
-
- pRetType = StructType::create(members, "${type['name']}", false);
-
- // Compute debug metadata
- llvm::DIBuilder builder(*pJitMgr->mpCurrentModule);
- llvm::DIFile* pFile = builder.createFile("${input_file}", "${os.path.normpath(input_dir).replace('\\', '/')}");
-
- std::vector<std::pair<std::string, uint32_t>> dbgMembers =
- {
- %for member in type['members']:
- std::make_pair("${member['name']}", ${pad(len(member['name']), max_name_len)}${member['lineNum']}),
- %endfor
- };
- pJitMgr->CreateDebugStructType(pRetType, "${type['name']}", pFile, ${type['lineNum']}, dbgMembers);
- }
-
- return pRetType;
- }
-
- %for member in type['members']:
- static const uint32_t ${type['name']}_${member['name']} ${pad(len(member['name']), max_name_len)}= ${loop.index};
- %endfor
-
-%endfor
-} // namespace SwrJit
-
-<%! # Global function definitions
- import os
- def needs_ctx(struct_type):
- for m in struct_type.get('members', []):
- if '(ctx)' in m.get('type', ''):
- return True
- return False
-
- def calc_max_len(fields):
- max_type_len = 0
- max_name_len = 0
- for f in fields:
- if len(f['type']) > max_type_len: max_type_len = len(f['type'])
- if len(f['name']) > max_name_len: max_name_len = len(f['name'])
- return (max_type_len, max_name_len)
-
- def pad(cur_len, max_len):
- pad_amt = max_len - cur_len
- return ' '*pad_amt
-%>
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp
deleted file mode 100644
index 92e0f406235..00000000000
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//============================================================================
-// Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a
-// copy of this software and associated documentation files (the "Software"),
-// to deal in the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice (including the next
-// paragraph) shall be included in all copies or substantial portions of the
-// Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-// IN THE SOFTWARE.
-//
-// @file gen_rasterizer${fileNum}.cpp
-//
-// @brief auto-generated file
-//
-// DO NOT EDIT
-//
-// Generation Command Line:
-// ${'\n// '.join(cmdline)}
-//
-//============================================================================
-// clang-format off
-
-#include "core/rasterizer.h"
-#include "core/rasterizer_impl.h"
-
-void InitRasterizerFuncs${fileNum}()
-{
- %for func in funcList:
- ${func}
- %endfor
-}
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.cpp b/src/gallium/drivers/swr/rasterizer/common/formats.cpp
deleted file mode 100644
index e0800f5e88e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/formats.cpp
+++ /dev/null
@@ -1,9298 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file formats.cpp
- *
- * @brief auto-generated file
- *
- * DO NOT EDIT
- *
- ******************************************************************************/
-
-#include "formats.h"
-
-// lookup table for unorm8 srgb -> float conversion
-const uint32_t srgb8Table[256] = {
- 0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40f, 0x3a9f22b4, 0x3ac6eb61, 0x3aeeb40f, 0x3b0b3e5e,
- 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518d, 0x3b70f18d, 0x3b83e1c6, 0x3b8fe616, 0x3b9c87fd,
- 0x3ba9c9b5, 0x3bb7ad6f, 0x3bc63549, 0x3bd5635f, 0x3be539c1, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152,
- 0x3c15a703, 0x3c1f45be, 0x3c293e6b, 0x3c3391f7, 0x3c3e4149, 0x3c494d43, 0x3c54b6c7, 0x3c607eb1,
- 0x3c6ca5dc, 0x3c792d22, 0x3c830aa8, 0x3c89af9f, 0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63431,
- 0x3cadd37d, 0x3cb5a601, 0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d3, 0x3cdfd00e, 0x3ce8ddb9,
- 0x3cf22131, 0x3cfb9ac6, 0x3d02a56c, 0x3d0798df, 0x3d0ca7e7, 0x3d11d2b0, 0x3d171965, 0x3d1c7c31,
- 0x3d21fb3c, 0x3d2796b2, 0x3d2d4ebe, 0x3d332384, 0x3d39152e, 0x3d3f23e6, 0x3d454fd4, 0x3d4b991f,
- 0x3d51ffef, 0x3d58846a, 0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c20f, 0x3d7add25, 0x3d810b66,
- 0x3d84b795, 0x3d887330, 0x3d8c3e4a, 0x3d9018f6, 0x3d940345, 0x3d97fd4a, 0x3d9c0716, 0x3da020bb,
- 0x3da44a4b, 0x3da883d7, 0x3daccd70, 0x3db12728, 0x3db59110, 0x3dba0b38, 0x3dbe95b5, 0x3dc33092,
- 0x3dc7dbe2, 0x3dcc97b6, 0x3dd1641f, 0x3dd6412c, 0x3ddb2eef, 0x3de02d77, 0x3de53cd5, 0x3dea5d19,
- 0x3def8e55, 0x3df4d093, 0x3dfa23e8, 0x3dff8861, 0x3e027f07, 0x3e054282, 0x3e080ea5, 0x3e0ae379,
- 0x3e0dc107, 0x3e10a755, 0x3e13966c, 0x3e168e53, 0x3e198f11, 0x3e1c98ae, 0x3e1fab32, 0x3e22c6a3,
- 0x3e25eb09, 0x3e29186c, 0x3e2c4ed2, 0x3e2f8e45, 0x3e32d6c8, 0x3e362865, 0x3e398322, 0x3e3ce706,
- 0x3e405419, 0x3e43ca62, 0x3e4749e8, 0x3e4ad2b1, 0x3e4e64c6, 0x3e52002b, 0x3e55a4e9, 0x3e595307,
- 0x3e5d0a8b, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf, 0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c,
- 0x3e7c1c38, 0x3e8014c2, 0x3e82203c, 0x3e84308d, 0x3e8645ba, 0x3e885fc5, 0x3e8a7eb2, 0x3e8ca283,
- 0x3e8ecb3d, 0x3e90f8e1, 0x3e932b74, 0x3e9562f8, 0x3e979f71, 0x3e99e0e2, 0x3e9c274e, 0x3e9e72b7,
- 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d289, 0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18333,
- 0x3eb3fc16, 0x3eb67a15, 0x3eb8fd34, 0x3ebb8576, 0x3ebe12e1, 0x3ec0a571, 0x3ec33d2d, 0x3ec5da17,
- 0x3ec87c33, 0x3ecb2383, 0x3ecdd00b, 0x3ed081cd, 0x3ed338cc, 0x3ed5f50b, 0x3ed8b68d, 0x3edb7d54,
- 0x3ede4965, 0x3ee11ac1, 0x3ee3f16b, 0x3ee6cd67, 0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba,
- 0x3ef56976, 0x3ef86594, 0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8,
- 0x3f06f106, 0x3f0884cf, 0x3f0a1b57, 0x3f0bb49d, 0x3f0d50a2, 0x3f0eef69, 0x3f1090f2, 0x3f123540,
- 0x3f13dc53, 0x3f15862d, 0x3f1732cf, 0x3f18e23b, 0x3f1a9471, 0x3f1c4973, 0x3f1e0143, 0x3f1fbbe1,
- 0x3f217950, 0x3f23398f, 0x3f24fca2, 0x3f26c288, 0x3f288b43, 0x3f2a56d5, 0x3f2c253f, 0x3f2df681,
- 0x3f2fca9e, 0x3f31a197, 0x3f337b6c, 0x3f355820, 0x3f3737b3, 0x3f391a26, 0x3f3aff7e, 0x3f3ce7b7,
- 0x3f3ed2d4, 0x3f40c0d6, 0x3f42b1c0, 0x3f44a592, 0x3f469c4d, 0x3f4895f3, 0x3f4a9284, 0x3f4c9203,
- 0x3f4e9470, 0x3f5099cd, 0x3f52a21a, 0x3f54ad59, 0x3f56bb8c, 0x3f58ccb3, 0x3f5ae0cf, 0x3f5cf7e2,
- 0x3f5f11ee, 0x3f612ef2, 0x3f634eef, 0x3f6571ec, 0x3f6797e1, 0x3f69c0d8, 0x3f6beccb, 0x3f6e1bc2,
- 0x3f704db6, 0x3f7282b1, 0x3f74baae, 0x3f76f5b3, 0x3f7933b9, 0x3f7b74cb, 0x3f7db8e0, 0x3f800000,
-};
-
-// order must match SWR_FORMAT
-const SWR_FORMAT_INFO gFormatInfo[] = {
-
- // R32G32B32A32_FLOAT (0x0)
- {
- "R32G32B32A32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32A32_SINT (0x1)
- {
- "R32G32B32A32_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32A32_UINT (0x2)
- {
- "R32G32B32A32_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R64G64_FLOAT (0x5)
- {
- "R64G64_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {64, 64, 0, 0}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32X32_FLOAT (0x6)
- {
- "R32G32B32X32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32A32_SSCALED (0x7)
- {
- "R32G32B32A32_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32A32_USCALED (0x8)
- {
- "R32G32B32A32_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x10)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x11)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x14)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x18)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x19)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R32G32B32A32_SFIXED (0x20)
- {
- "R32G32B32A32_SFIXED",
- {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 32, 32}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x21)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x22)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x23)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x24)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x25)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x26)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x27)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x28)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x29)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x2A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x2B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x2C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x2D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x2E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x2F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x30)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x31)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x32)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x33)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x34)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x35)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x36)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x37)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x38)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x39)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x3A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x3B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x3C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x3D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x3E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x3F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R32G32B32_FLOAT (0x40)
- {
- "R32G32B32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {32, 32, 32, 0}, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32_SINT (0x41)
- {
- "R32G32B32_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {32, 32, 32, 0}, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32_UINT (0x42)
- {
- "R32G32B32_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {32, 32, 32, 0}, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x43)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x44)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R32G32B32_SSCALED (0x45)
- {
- "R32G32B32_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {32, 32, 32, 0}, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32B32_USCALED (0x46)
- {
- "R32G32B32_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {32, 32, 32, 0}, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x47)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x48)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x49)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x4F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R32G32B32_SFIXED (0x50)
- {
- "R32G32B32_SFIXED",
- {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {32, 32, 32, 0}, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x51)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x52)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x53)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x54)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x55)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x56)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x57)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x58)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x59)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x5A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x5B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x5C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x5D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x5E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x5F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x60)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x61)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x62)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x63)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x64)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x65)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x66)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x67)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x68)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x69)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x6A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x6B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x6C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x6D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x6E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x6F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x70)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x71)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x72)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x73)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x74)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x75)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x76)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x77)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x78)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x79)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x7A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x7B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x7C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x7D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x7E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x7F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R16G16B16A16_UNORM (0x80)
- {
- "R16G16B16A16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 65535.0f,
- 1.0f / 65535.0f,
- 1.0f / 65535.0f,
- 1.0f / 65535.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16A16_SNORM (0x81)
- {
- "R16G16B16A16_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 32767.0f,
- 1.0f / 32767.0f,
- 1.0f / 32767.0f,
- 1.0f / 32767.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16A16_SINT (0x82)
- {
- "R16G16B16A16_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16A16_UINT (0x83)
- {
- "R16G16B16A16_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16A16_FLOAT (0x84)
- {
- "R16G16B16A16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32_FLOAT (0x85)
- {
- "R32G32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32_SINT (0x86)
- {
- "R32G32_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32_UINT (0x87)
- {
- "R32G32_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_FLOAT_X8X24_TYPELESS (0x88)
- {
- "R32_FLOAT_X8X24_TYPELESS",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // X32_TYPELESS_G8X24_UINT (0x89)
- {
- "X32_TYPELESS_G8X24_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L32A32_FLOAT (0x8A)
- {
- "L32A32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x8B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x8C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R64_FLOAT (0x8D)
- {
- "R64_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {64, 0, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16X16_UNORM (0x8E)
- {
- "R16G16B16X16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16X16_FLOAT (0x8F)
- {
- "R16G16B16X16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x90)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // L32X32_FLOAT (0x91)
- {
- "L32X32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // I32X32_FLOAT (0x92)
- {
- "I32X32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16A16_SSCALED (0x93)
- {
- "R16G16B16A16_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16A16_USCALED (0x94)
- {
- "R16G16B16A16_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {16, 16, 16, 16}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32_SSCALED (0x95)
- {
- "R32G32_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32G32_USCALED (0x96)
- {
- "R32G32_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x97)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x98)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x99)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x9A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x9B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x9C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x9D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x9E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x9F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R32G32_SFIXED (0xA0)
- {
- "R32G32_SFIXED",
- {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {32, 32, 0, 0}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xA1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA8)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xA9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xAA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xAB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xAC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xAD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xAE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xAF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB0)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB8)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xB9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xBA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xBB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xBC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xBD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xBE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xBF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // B8G8R8A8_UNORM (0xC0)
- {
- "B8G8R8A8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B8G8R8A8_UNORM_SRGB (0xC1)
- {
- "B8G8R8A8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_UNORM (0xC2)
- {
- "R10G10B10A2_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_UNORM_SRGB (0xC3)
- {
- "R10G10B10A2_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_UINT (0xC4)
- {
- "R10G10B10A2_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xC5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xC6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R8G8B8A8_UNORM (0xC7)
- {
- "R8G8B8A8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8A8_UNORM_SRGB (0xC8)
- {
- "R8G8B8A8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8A8_SNORM (0xC9)
- {
- "R8G8B8A8_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8A8_SINT (0xCA)
- {
- "R8G8B8A8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8A8_UINT (0xCB)
- {
- "R8G8B8A8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_UNORM (0xCC)
- {
- "R16G16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_SNORM (0xCD)
- {
- "R16G16_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 32767.0f, 1.0f / 32767.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_SINT (0xCE)
- {
- "R16G16_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_UINT (0xCF)
- {
- "R16G16_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_FLOAT (0xD0)
- {
- "R16G16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_UNORM (0xD1)
- {
- "B10G10R10A2_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_UNORM_SRGB (0xD2)
- {
- "B10G10R10A2_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R11G11B10_FLOAT (0xD3)
- {
- "R11G11B10_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {11, 11, 10, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xD4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
-
- // R10G10B10_FLOAT_A2_UNORM (0xD5)
- {
- "R10G10B10_FLOAT_A2_UNORM",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f / 3.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_SINT (0xD6)
- {
- "R32_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_UINT (0xD7)
- {
- "R32_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_FLOAT (0xD8)
- {
- "R32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R24_UNORM_X8_TYPELESS (0xD9)
- {
- "R24_UNORM_X8_TYPELESS",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {24, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 16777215.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // X24_TYPELESS_G8_UINT (0xDA)
- {
- "X24_TYPELESS_G8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {1, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xDB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xDC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // L32_UNORM (0xDD)
- {
- "L32_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 4294967295.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xDE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // L16A16_UNORM (0xDF)
- {
- "L16A16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // I24X8_UNORM (0xE0)
- {
- "I24X8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {24, 8, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L24X8_UNORM (0xE1)
- {
- "L24X8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {24, 8, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xE2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // I32_FLOAT (0xE3)
- {
- "I32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L32_FLOAT (0xE4)
- {
- "L32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // A32_FLOAT (0xE5)
- {
- "A32_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {3, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xE6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xE7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xE8)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // B8G8R8X8_UNORM (0xE9)
- {
- "B8G8R8X8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B8G8R8X8_UNORM_SRGB (0xEA)
- {
- "B8G8R8X8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8X8_UNORM (0xEB)
- {
- "R8G8B8X8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8X8_UNORM_SRGB (0xEC)
- {
- "R8G8B8X8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R9G9B9E5_SHAREDEXP (0xED)
- {
- "R9G9B9E5_SHAREDEXP",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {9, 9, 9, 5}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10X2_UNORM (0xEE)
- {
- "B10G10R10X2_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xEF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // L16A16_FLOAT (0xF0)
- {
- "L16A16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xF1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xF2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R10G10B10X2_USCALED (0xF3)
- {
- "R10G10B10X2_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8A8_SSCALED (0xF4)
- {
- "R8G8B8A8_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8A8_USCALED (0xF5)
- {
- "R8G8B8A8_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_SSCALED (0xF6)
- {
- "R16G16_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16_USCALED (0xF7)
- {
- "R16G16_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {16, 16, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_SSCALED (0xF8)
- {
- "R32_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_USCALED (0xF9)
- {
- "R32_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0xFA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xFB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xFC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xFD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xFE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0xFF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // B5G6R5_UNORM (0x100)
- {
- "B5G6R5_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 0}, // Swizzle
- {5, 6, 5, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B5G6R5_UNORM_SRGB (0x101)
- {
- "B5G6R5_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 0}, // Swizzle
- {5, 6, 5, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 3, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B5G5R5A1_UNORM (0x102)
- {
- "B5G5R5A1_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {5, 5, 5, 1}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B5G5R5A1_UNORM_SRGB (0x103)
- {
- "B5G5R5A1_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {5, 5, 5, 1}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B4G4R4A4_UNORM (0x104)
- {
- "B4G4R4A4_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {4, 4, 4, 4}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B4G4R4A4_UNORM_SRGB (0x105)
- {
- "B4G4R4A4_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {4, 4, 4, 4}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8_UNORM (0x106)
- {
- "R8G8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8_SNORM (0x107)
- {
- "R8G8_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 127.0f, 1.0f / 127.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8_SINT (0x108)
- {
- "R8G8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8_UINT (0x109)
- {
- "R8G8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_UNORM (0x10A)
- {
- "R16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_SNORM (0x10B)
- {
- "R16_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 32767.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_SINT (0x10C)
- {
- "R16_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_UINT (0x10D)
- {
- "R16_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_FLOAT (0x10E)
- {
- "R16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x10F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x110)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // I16_UNORM (0x111)
- {
- "I16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L16_UNORM (0x112)
- {
- "L16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // A16_UNORM (0x113)
- {
- "A16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {3, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L8A8_UNORM (0x114)
- {
- "L8A8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // I16_FLOAT (0x115)
- {
- "I16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L16_FLOAT (0x116)
- {
- "L16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // A16_FLOAT (0x117)
- {
- "A16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {3, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L8A8_UNORM_SRGB (0x118)
- {
- "L8A8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, true, false, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x119)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // B5G5R5X1_UNORM (0x11A)
- {
- "B5G5R5X1_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {5, 5, 5, 1}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B5G5R5X1_UNORM_SRGB (0x11B)
- {
- "B5G5R5X1_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {5, 5, 5, 1}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8_SSCALED (0x11C)
- {
- "R8G8_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8_USCALED (0x11D)
- {
- "R8G8_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_SSCALED (0x11E)
- {
- "R16_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16_USCALED (0x11F)
- {
- "R16_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {16, 0, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x120)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x121)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x122)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x123)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // A1B5G5R5_UNORM (0x124)
- {
- "A1B5G5R5_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {3, 2, 1, 0}, // Swizzle
- {1, 5, 5, 5}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 1.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // A4B4G4R4_UNORM (0x125)
- {
- "A4B4G4R4_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {3, 2, 1, 0}, // Swizzle
- {4, 4, 4, 4}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L8A8_UINT (0x126)
- {
- "L8A8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L8A8_SINT (0x127)
- {
- "L8A8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 3, 0, 0}, // Swizzle
- {8, 8, 0, 0}, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x128)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x129)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x12F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x130)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x131)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x132)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x133)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x134)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x135)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x136)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x137)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x138)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x139)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x13F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R8_UNORM (0x140)
- {
- "R8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8_SNORM (0x141)
- {
- "R8_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8_SINT (0x142)
- {
- "R8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8_UINT (0x143)
- {
- "R8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // A8_UNORM (0x144)
- {
- "A8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {3, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // I8_UNORM (0x145)
- {
- "I8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L8_UNORM (0x146)
- {
- "L8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x147)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x148)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R8_SSCALED (0x149)
- {
- "R8_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8_USCALED (0x14A)
- {
- "R8_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x14B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // L8_UNORM_SRGB (0x14C)
- {
- "L8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x14D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x14E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x14F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x150)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x151)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // L8_UINT (0x152)
- {
- "L8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // L8_SINT (0x153)
- {
- "L8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // I8_UINT (0x154)
- {
- "I8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // I8_SINT (0x155)
- {
- "I8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x156)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x157)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x158)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x159)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x15F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x160)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x161)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x162)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x163)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x164)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x165)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x166)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x167)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x168)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x169)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x16F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x170)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x171)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x172)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x173)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x174)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x175)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x176)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x177)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x178)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x179)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17A)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17B)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17C)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17D)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x17F)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // DXT1_RGB_SRGB (0x180)
- {
- "DXT1_RGB_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // padding (0x181)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x182)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // YCRCB_SWAPUVY (0x183)
- {
- "YCRCB_SWAPUVY",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- true, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 2, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x184)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x185)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // BC1_UNORM (0x186)
- {
- "BC1_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC2_UNORM (0x187)
- {
- "BC2_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC3_UNORM (0x188)
- {
- "BC3_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC4_UNORM (0x189)
- {
- "BC4_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC5_UNORM (0x18A)
- {
- "BC5_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC1_UNORM_SRGB (0x18B)
- {
- "BC1_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC2_UNORM_SRGB (0x18C)
- {
- "BC2_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC3_UNORM_SRGB (0x18D)
- {
- "BC3_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // padding (0x18E)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // YCRCB_SWAPUV (0x18F)
- {
- "YCRCB_SWAPUV",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- true, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 2, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x190)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // DXT1_RGB (0x191)
- {
- "DXT1_RGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // padding (0x192)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R8G8B8_UNORM (0x193)
- {
- "R8G8B8_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8_SNORM (0x194)
- {
- "R8G8B8_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8_SSCALED (0x195)
- {
- "R8G8B8_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8_USCALED (0x196)
- {
- "R8G8B8_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R64G64B64A64_FLOAT (0x197)
- {
- "R64G64B64A64_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {64, 64, 64, 64}, // Bits per component
- 256, // Bits per element
- 32, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R64G64B64_FLOAT (0x198)
- {
- "R64G64B64_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {64, 64, 64, 0}, // Bits per component
- 192, // Bits per element
- 24, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // BC4_SNORM (0x199)
- {
- "BC4_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC5_SNORM (0x19A)
- {
- "BC5_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // R16G16B16_FLOAT (0x19B)
- {
- "R16G16B16_FLOAT",
- {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16_UNORM (0x19C)
- {
- "R16G16B16_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16_SNORM (0x19D)
- {
- "R16G16B16_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16_SSCALED (0x19E)
- {
- "R16G16B16_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16_USCALED (0x19F)
- {
- "R16G16B16_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x1A0)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // BC6H_SF16 (0x1A1)
- {
- "BC6H_SF16",
- {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC7_UNORM (0x1A2)
- {
- "BC7_UNORM",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC7_UNORM_SRGB (0x1A3)
- {
- "BC7_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // BC6H_UF16 (0x1A4)
- {
- "BC6H_UF16",
- {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 8, 8, 8}, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, false, false, false}, // Is normalized?
- {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
- },
-
- // padding (0x1A5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1A6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1A7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R8G8B8_UNORM_SRGB (0x1A8)
- {
- "R8G8B8_UNORM_SRGB",
- {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, false}, // Is normalized?
- {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x1A9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1AA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1AB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1AC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1AD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1AE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1AF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R16G16B16_UINT (0x1B0)
- {
- "R16G16B16_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R16G16B16_SINT (0x1B1)
- {
- "R16G16B16_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {16, 16, 16, 0}, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R32_SFIXED (0x1B2)
- {
- "R32_SFIXED",
- {SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 0, 0, 0}, // Swizzle
- {32, 0, 0, 0}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_SNORM (0x1B3)
- {
- "R10G10B10A2_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_USCALED (0x1B4)
- {
- "R10G10B10A2_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_SSCALED (0x1B5)
- {
- "R10G10B10A2_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R10G10B10A2_SINT (0x1B6)
- {
- "R10G10B10A2_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_SNORM (0x1B7)
- {
- "B10G10R10A2_SNORM",
- {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {true, true, true, true}, // Is normalized?
- {1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_USCALED (0x1B8)
- {
- "B10G10R10A2_USCALED",
- {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_SSCALED (0x1B9)
- {
- "B10G10R10A2_SSCALED",
- {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
- {0, 0, 0, 0x3f800000}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_UINT (0x1BA)
- {
- "B10G10R10A2_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // B10G10R10A2_SINT (0x1BB)
- {
- "B10G10R10A2_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {2, 1, 0, 3}, // Swizzle
- {10, 10, 10, 2}, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x1BC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1BD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1BE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1BF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C0)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1C7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // R8G8B8_UINT (0x1C8)
- {
- "R8G8B8_UINT",
- {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // R8G8B8_SINT (0x1C9)
- {
- "R8G8B8_SINT",
- {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 0}, // Swizzle
- {8, 8, 8, 0}, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-
- // padding (0x1CA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1CB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1CC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1CD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1CE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1CF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D0)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D8)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1D9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1DA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1DB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1DC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1DD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1DE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1DF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E0)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E8)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1E9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1EA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1EB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1EC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1ED)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1EE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1EF)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F0)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F1)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F2)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F3)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F4)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F5)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F6)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F7)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F8)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1F9)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1FA)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1FB)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1FC)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1FD)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // padding (0x1FE)
- {nullptr,
- {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- {0, 0, 0, 0},
- 0,
- 0,
- 0,
- false,
- false,
- false,
- false,
- {false, false, false, false},
- {0.0f, 0.0f, 0.0f, 0.0f},
- 1,
- 1},
- // RAW (0x1FF)
- {
- "RAW",
- {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
- {0, 0, 0, 0x1}, // Defaults for missing components
- {0, 1, 2, 3}, // Swizzle
- {8, 0, 0, 0}, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- {false, false, false, false}, // Is normalized?
- {1.0f, 0, 0, 0}, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
- },
-};
diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.h b/src/gallium/drivers/swr/rasterizer/common/formats.h
deleted file mode 100644
index b7a3e533d15..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/formats.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file formats.h
- *
- * @brief auto-generated file
- *
- * DO NOT EDIT
- *
- ******************************************************************************/
-
-#pragma once
-
-#include "common/os.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TYPE - Format component type
-//////////////////////////////////////////////////////////////////////////
-enum SWR_TYPE
-{
- SWR_TYPE_UNKNOWN,
- SWR_TYPE_UNUSED,
- SWR_TYPE_UNORM,
- SWR_TYPE_SNORM,
- SWR_TYPE_UINT,
- SWR_TYPE_SINT,
- SWR_TYPE_FLOAT,
- SWR_TYPE_SSCALED,
- SWR_TYPE_USCALED,
- SWR_TYPE_SFIXED,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_FORMAT
-//////////////////////////////////////////////////////////////////////////
-enum SWR_FORMAT
-{
- R32G32B32A32_FLOAT = 0x0,
- R32G32B32A32_SINT = 0x1,
- R32G32B32A32_UINT = 0x2,
- R64G64_FLOAT = 0x5,
- R32G32B32X32_FLOAT = 0x6,
- R32G32B32A32_SSCALED = 0x7,
- R32G32B32A32_USCALED = 0x8,
- R32G32B32A32_SFIXED = 0x20,
- R32G32B32_FLOAT = 0x40,
- R32G32B32_SINT = 0x41,
- R32G32B32_UINT = 0x42,
- R32G32B32_SSCALED = 0x45,
- R32G32B32_USCALED = 0x46,
- R32G32B32_SFIXED = 0x50,
- R16G16B16A16_UNORM = 0x80,
- R16G16B16A16_SNORM = 0x81,
- R16G16B16A16_SINT = 0x82,
- R16G16B16A16_UINT = 0x83,
- R16G16B16A16_FLOAT = 0x84,
- R32G32_FLOAT = 0x85,
- R32G32_SINT = 0x86,
- R32G32_UINT = 0x87,
- R32_FLOAT_X8X24_TYPELESS = 0x88,
- X32_TYPELESS_G8X24_UINT = 0x89,
- L32A32_FLOAT = 0x8A,
- R64_FLOAT = 0x8D,
- R16G16B16X16_UNORM = 0x8E,
- R16G16B16X16_FLOAT = 0x8F,
- L32X32_FLOAT = 0x91,
- I32X32_FLOAT = 0x92,
- R16G16B16A16_SSCALED = 0x93,
- R16G16B16A16_USCALED = 0x94,
- R32G32_SSCALED = 0x95,
- R32G32_USCALED = 0x96,
- R32G32_SFIXED = 0xA0,
- B8G8R8A8_UNORM = 0xC0,
- B8G8R8A8_UNORM_SRGB = 0xC1,
- R10G10B10A2_UNORM = 0xC2,
- R10G10B10A2_UNORM_SRGB = 0xC3,
- R10G10B10A2_UINT = 0xC4,
- R8G8B8A8_UNORM = 0xC7,
- R8G8B8A8_UNORM_SRGB = 0xC8,
- R8G8B8A8_SNORM = 0xC9,
- R8G8B8A8_SINT = 0xCA,
- R8G8B8A8_UINT = 0xCB,
- R16G16_UNORM = 0xCC,
- R16G16_SNORM = 0xCD,
- R16G16_SINT = 0xCE,
- R16G16_UINT = 0xCF,
- R16G16_FLOAT = 0xD0,
- B10G10R10A2_UNORM = 0xD1,
- B10G10R10A2_UNORM_SRGB = 0xD2,
- R11G11B10_FLOAT = 0xD3,
- R10G10B10_FLOAT_A2_UNORM = 0xD5,
- R32_SINT = 0xD6,
- R32_UINT = 0xD7,
- R32_FLOAT = 0xD8,
- R24_UNORM_X8_TYPELESS = 0xD9,
- X24_TYPELESS_G8_UINT = 0xDA,
- L32_UNORM = 0xDD,
- L16A16_UNORM = 0xDF,
- I24X8_UNORM = 0xE0,
- L24X8_UNORM = 0xE1,
- I32_FLOAT = 0xE3,
- L32_FLOAT = 0xE4,
- A32_FLOAT = 0xE5,
- B8G8R8X8_UNORM = 0xE9,
- B8G8R8X8_UNORM_SRGB = 0xEA,
- R8G8B8X8_UNORM = 0xEB,
- R8G8B8X8_UNORM_SRGB = 0xEC,
- R9G9B9E5_SHAREDEXP = 0xED,
- B10G10R10X2_UNORM = 0xEE,
- L16A16_FLOAT = 0xF0,
- R10G10B10X2_USCALED = 0xF3,
- R8G8B8A8_SSCALED = 0xF4,
- R8G8B8A8_USCALED = 0xF5,
- R16G16_SSCALED = 0xF6,
- R16G16_USCALED = 0xF7,
- R32_SSCALED = 0xF8,
- R32_USCALED = 0xF9,
- B5G6R5_UNORM = 0x100,
- B5G6R5_UNORM_SRGB = 0x101,
- B5G5R5A1_UNORM = 0x102,
- B5G5R5A1_UNORM_SRGB = 0x103,
- B4G4R4A4_UNORM = 0x104,
- B4G4R4A4_UNORM_SRGB = 0x105,
- R8G8_UNORM = 0x106,
- R8G8_SNORM = 0x107,
- R8G8_SINT = 0x108,
- R8G8_UINT = 0x109,
- R16_UNORM = 0x10A,
- R16_SNORM = 0x10B,
- R16_SINT = 0x10C,
- R16_UINT = 0x10D,
- R16_FLOAT = 0x10E,
- I16_UNORM = 0x111,
- L16_UNORM = 0x112,
- A16_UNORM = 0x113,
- L8A8_UNORM = 0x114,
- I16_FLOAT = 0x115,
- L16_FLOAT = 0x116,
- A16_FLOAT = 0x117,
- L8A8_UNORM_SRGB = 0x118,
- B5G5R5X1_UNORM = 0x11A,
- B5G5R5X1_UNORM_SRGB = 0x11B,
- R8G8_SSCALED = 0x11C,
- R8G8_USCALED = 0x11D,
- R16_SSCALED = 0x11E,
- R16_USCALED = 0x11F,
- A1B5G5R5_UNORM = 0x124,
- A4B4G4R4_UNORM = 0x125,
- L8A8_UINT = 0x126,
- L8A8_SINT = 0x127,
- R8_UNORM = 0x140,
- R8_SNORM = 0x141,
- R8_SINT = 0x142,
- R8_UINT = 0x143,
- A8_UNORM = 0x144,
- I8_UNORM = 0x145,
- L8_UNORM = 0x146,
- R8_SSCALED = 0x149,
- R8_USCALED = 0x14A,
- L8_UNORM_SRGB = 0x14C,
- L8_UINT = 0x152,
- L8_SINT = 0x153,
- I8_UINT = 0x154,
- I8_SINT = 0x155,
- DXT1_RGB_SRGB = 0x180,
- YCRCB_SWAPUVY = 0x183,
- BC1_UNORM = 0x186,
- BC2_UNORM = 0x187,
- BC3_UNORM = 0x188,
- BC4_UNORM = 0x189,
- BC5_UNORM = 0x18A,
- BC1_UNORM_SRGB = 0x18B,
- BC2_UNORM_SRGB = 0x18C,
- BC3_UNORM_SRGB = 0x18D,
- YCRCB_SWAPUV = 0x18F,
- DXT1_RGB = 0x191,
- R8G8B8_UNORM = 0x193,
- R8G8B8_SNORM = 0x194,
- R8G8B8_SSCALED = 0x195,
- R8G8B8_USCALED = 0x196,
- R64G64B64A64_FLOAT = 0x197,
- R64G64B64_FLOAT = 0x198,
- BC4_SNORM = 0x199,
- BC5_SNORM = 0x19A,
- R16G16B16_FLOAT = 0x19B,
- R16G16B16_UNORM = 0x19C,
- R16G16B16_SNORM = 0x19D,
- R16G16B16_SSCALED = 0x19E,
- R16G16B16_USCALED = 0x19F,
- BC6H_SF16 = 0x1A1,
- BC7_UNORM = 0x1A2,
- BC7_UNORM_SRGB = 0x1A3,
- BC6H_UF16 = 0x1A4,
- R8G8B8_UNORM_SRGB = 0x1A8,
- R16G16B16_UINT = 0x1B0,
- R16G16B16_SINT = 0x1B1,
- R32_SFIXED = 0x1B2,
- R10G10B10A2_SNORM = 0x1B3,
- R10G10B10A2_USCALED = 0x1B4,
- R10G10B10A2_SSCALED = 0x1B5,
- R10G10B10A2_SINT = 0x1B6,
- B10G10R10A2_SNORM = 0x1B7,
- B10G10R10A2_USCALED = 0x1B8,
- B10G10R10A2_SSCALED = 0x1B9,
- B10G10R10A2_UINT = 0x1BA,
- B10G10R10A2_SINT = 0x1BB,
- R8G8B8_UINT = 0x1C8,
- R8G8B8_SINT = 0x1C9,
- RAW = 0x1FF,
- NUM_SWR_FORMATS = 0x200,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_FORMAT_INFO - Format information
-//////////////////////////////////////////////////////////////////////////
-struct SWR_FORMAT_INFO
-{
- const char* name;
- SWR_TYPE type[4];
- uint32_t defaults[4];
- uint32_t swizzle[4]; ///< swizzle per component
- uint32_t bpc[4]; ///< bits per component
- uint32_t bpp; ///< bits per pixel
- uint32_t Bpp; ///< bytes per pixel
- uint32_t numComps; ///< number of components
- bool isSRGB;
- bool isBC;
- bool isSubsampled;
- bool isLuminance;
- bool isNormalized[4];
- float toFloat[4];
- uint32_t bcWidth;
- uint32_t bcHeight;
-};
-
-extern const SWR_FORMAT_INFO gFormatInfo[NUM_SWR_FORMATS];
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Retrieves format info struct for given format.
-/// @param format - SWR format
-INLINE const SWR_FORMAT_INFO& GetFormatInfo(SWR_FORMAT format)
-{
- SWR_ASSERT(format < NUM_SWR_FORMATS, "Invalid Surface Format: %d", format);
- SWR_ASSERT(gFormatInfo[format].name != nullptr, "Invalid Surface Format: %d", format);
- return gFormatInfo[format];
-}
-
-// lookup table for unorm8 srgb -> float conversion
-extern const uint32_t srgb8Table[256];
diff --git a/src/gallium/drivers/swr/rasterizer/common/intrin.h b/src/gallium/drivers/swr/rasterizer/common/intrin.h
deleted file mode 100644
index 95b462b1e36..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/intrin.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#ifndef __SWR_INTRIN_H__
-#define __SWR_INTRIN_H__
-
-#include "os.h"
-
-#if !defined(SIMD_ARCH)
-#define SIMD_ARCH KNOB_ARCH
-#endif
-
-#include "simdlib_types.hpp"
-
-typedef SIMDImpl::SIMD128Impl::Float simd4scalar;
-typedef SIMDImpl::SIMD128Impl::Double simd4scalard;
-typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;
-typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector;
-typedef SIMDImpl::SIMD128Impl::Mask simd4mask;
-
-typedef SIMDImpl::SIMD256Impl::Float simd8scalar;
-typedef SIMDImpl::SIMD256Impl::Double simd8scalard;
-typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;
-typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector;
-typedef SIMDImpl::SIMD256Impl::Mask simd8mask;
-
-typedef SIMDImpl::SIMD512Impl::Float simd16scalar;
-typedef SIMDImpl::SIMD512Impl::Double simd16scalard;
-typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;
-typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector;
-typedef SIMDImpl::SIMD512Impl::Mask simd16mask;
-
-#if KNOB_SIMD_WIDTH == 8
-typedef simd8scalar simdscalar;
-typedef simd8scalard simdscalard;
-typedef simd8scalari simdscalari;
-typedef simd8vector simdvector;
-typedef simd8mask simdmask;
-#else
-#error Unsupported vector width
-#endif
-
-INLINE
-UINT pdep_u32(UINT a, UINT mask)
-{
-#if KNOB_ARCH >= KNOB_ARCH_AVX2
- return _pdep_u32(a, mask);
-#else
- UINT result = 0;
-
- // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
- // using bsf instead of funky loop
- unsigned long maskIndex = 0;
- while (_BitScanForward(&maskIndex, mask))
- {
- // 1. isolate lowest set bit of mask
- const UINT lowest = 1 << maskIndex;
-
- // 2. populate LSB from src
- const UINT LSB = (UINT)((int)(a << 31) >> 31);
-
- // 3. copy bit from mask
- result |= LSB & lowest;
-
- // 4. clear lowest bit
- mask &= ~lowest;
-
- // 5. prepare for next iteration
- a >>= 1;
- }
-
- return result;
-#endif
-}
-
-INLINE
-UINT pext_u32(UINT a, UINT mask)
-{
-#if KNOB_ARCH >= KNOB_ARCH_AVX2
- return _pext_u32(a, mask);
-#else
- UINT result = 0;
- unsigned long maskIndex;
- uint32_t currentBit = 0;
- while (_BitScanForward(&maskIndex, mask))
- {
- // 1. isolate lowest set bit of mask
- const UINT lowest = 1 << maskIndex;
-
- // 2. copy bit from mask
- result |= ((a & lowest) > 0) << currentBit++;
-
- // 3. clear lowest bit
- mask &= ~lowest;
- }
- return result;
-#endif
-}
-
-#endif //__SWR_INTRIN_H__
diff --git a/src/gallium/drivers/swr/rasterizer/common/isa.hpp b/src/gallium/drivers/swr/rasterizer/common/isa.hpp
deleted file mode 100644
index 41af0055f1e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/isa.hpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#pragma once
-
-#include <iostream>
-#include <vector>
-#include <bitset>
-#include <array>
-#include <string>
-#include <algorithm>
-
-// Clang for Windows does supply an intrin.h with __cpuid intrinsics, however...
-// It seems to not realize that a write to "b" (ebx) will kill the value in rbx.
-// This attempts to use the "native" clang / gcc intrinsics instead of the windows
-// compatible ones.
-#if defined(_MSC_VER) && !defined(__clang__)
-#include <intrin.h>
-#else
-#include <string.h>
-#if !defined(__cpuid)
-#include <cpuid.h>
-#endif
-#endif
-
-class InstructionSet
-{
-public:
- InstructionSet() : CPU_Rep(){};
-
- // getters
- std::string Vendor(void) { return CPU_Rep.vendor_; }
- std::string Brand(void) { return CPU_Rep.brand_; }
-
- bool SSE3(void) { return CPU_Rep.f_1_ECX_[0]; }
- bool PCLMULQDQ(void) { return CPU_Rep.f_1_ECX_[1]; }
- bool MONITOR(void) { return CPU_Rep.f_1_ECX_[3]; }
- bool SSSE3(void) { return CPU_Rep.f_1_ECX_[9]; }
- bool FMA(void) { return CPU_Rep.f_1_ECX_[12]; }
- bool CMPXCHG16B(void) { return CPU_Rep.f_1_ECX_[13]; }
- bool SSE41(void) { return CPU_Rep.f_1_ECX_[19]; }
- bool SSE42(void) { return CPU_Rep.f_1_ECX_[20]; }
- bool MOVBE(void) { return CPU_Rep.f_1_ECX_[22]; }
- bool POPCNT(void) { return CPU_Rep.f_1_ECX_[23]; }
- bool AES(void) { return CPU_Rep.f_1_ECX_[25]; }
- bool XSAVE(void) { return CPU_Rep.f_1_ECX_[26]; }
- bool OSXSAVE(void) { return CPU_Rep.f_1_ECX_[27]; }
- bool RDRAND(void) { return CPU_Rep.f_1_ECX_[30]; }
-
- bool MSR(void) { return CPU_Rep.f_1_EDX_[5]; }
- bool CX8(void) { return CPU_Rep.f_1_EDX_[8]; }
- bool SEP(void) { return CPU_Rep.f_1_EDX_[11]; }
- bool CMOV(void) { return CPU_Rep.f_1_EDX_[15]; }
- bool CLFSH(void) { return CPU_Rep.f_1_EDX_[19]; }
- bool MMX(void) { return CPU_Rep.f_1_EDX_[23]; }
- bool FXSR(void) { return CPU_Rep.f_1_EDX_[24]; }
- bool SSE(void) { return CPU_Rep.f_1_EDX_[25]; }
- bool SSE2(void) { return CPU_Rep.f_1_EDX_[26]; }
-
- bool FSGSBASE(void) { return CPU_Rep.f_7_EBX_[0]; }
- bool BMI1(void) { return CPU_Rep.f_7_EBX_[3]; }
- bool HLE(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[4]; }
- bool BMI2(void) { return CPU_Rep.f_7_EBX_[8]; }
- bool ERMS(void) { return CPU_Rep.f_7_EBX_[9]; }
- bool INVPCID(void) { return CPU_Rep.f_7_EBX_[10]; }
- bool RTM(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_7_EBX_[11]; }
- bool RDSEED(void) { return CPU_Rep.f_7_EBX_[18]; }
- bool ADX(void) { return CPU_Rep.f_7_EBX_[19]; }
- bool SHA(void) { return CPU_Rep.f_7_EBX_[29]; }
-
- bool PREFETCHWT1(void) { return CPU_Rep.f_7_ECX_[0]; }
-
- bool LAHF(void) { return CPU_Rep.f_81_ECX_[0]; }
- bool LZCNT(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_ECX_[5]; }
- bool ABM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[5]; }
- bool SSE4a(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[6]; }
- bool XOP(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[11]; }
- bool TBM(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_ECX_[21]; }
-
- bool SYSCALL(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[11]; }
- bool MMXEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[22]; }
- bool RDTSCP(void) { return CPU_Rep.isIntel_ && CPU_Rep.f_81_EDX_[27]; }
- bool _3DNOWEXT(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[30]; }
- bool _3DNOW(void) { return CPU_Rep.isAMD_ && CPU_Rep.f_81_EDX_[31]; }
-
- bool AVX(void) { return CPU_Rep.f_1_ECX_[28]; }
- bool F16C(void) { return CPU_Rep.f_1_ECX_[29]; }
- bool AVX2(void) { return CPU_Rep.f_7_EBX_[5]; }
- bool AVX512F(void) { return CPU_Rep.f_7_EBX_[16]; }
- bool AVX512PF(void) { return CPU_Rep.f_7_EBX_[26]; }
- bool AVX512ER(void) { return CPU_Rep.f_7_EBX_[27]; }
- bool AVX512CD(void) { return CPU_Rep.f_7_EBX_[28]; }
-
-private:
- class InstructionSet_Internal
- {
- public:
- InstructionSet_Internal() :
- nIds_{0}, nExIds_{0}, isIntel_{false}, isAMD_{false}, f_1_ECX_{0}, f_1_EDX_{0},
- f_7_EBX_{0}, f_7_ECX_{0}, f_81_ECX_{0}, f_81_EDX_{0}, data_{}, extdata_{}
- {
- // int cpuInfo[4] = {-1};
- std::array<int, 4> cpui;
-
- // Calling __cpuid with 0x0 as the function_id argument
- // gets the number of the highest valid function ID.
-#if defined(_MSC_VER) && !defined(__clang__)
- __cpuid(cpui.data(), 0);
- nIds_ = cpui[0];
-#else
- nIds_ = __get_cpuid_max(0, NULL);
-#endif
-
- for (int i = 0; i <= nIds_; ++i)
- {
-#if defined(_MSC_VER) && !defined(__clang__)
- __cpuidex(cpui.data(), i, 0);
-#else
- int* data = cpui.data();
- __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
-#endif
- data_.push_back(cpui);
- }
-
- // Capture vendor string
- char vendor[0x20];
- memset(vendor, 0, sizeof(vendor));
- *reinterpret_cast<int*>(vendor) = data_[0][1];
- *reinterpret_cast<int*>(vendor + 4) = data_[0][3];
- *reinterpret_cast<int*>(vendor + 8) = data_[0][2];
- vendor_ = vendor;
- if (vendor_ == "GenuineIntel")
- {
- isIntel_ = true;
- }
- else if (vendor_ == "AuthenticAMD")
- {
- isAMD_ = true;
- }
-
- // load bitset with flags for function 0x00000001
- if (nIds_ >= 1)
- {
- f_1_ECX_ = data_[1][2];
- f_1_EDX_ = data_[1][3];
- }
-
- // load bitset with flags for function 0x00000007
- if (nIds_ >= 7)
- {
- f_7_EBX_ = data_[7][1];
- f_7_ECX_ = data_[7][2];
- }
-
- // Calling __cpuid with 0x80000000 as the function_id argument
- // gets the number of the highest valid extended ID.
-#if defined(_MSC_VER) && !defined(__clang__)
- __cpuid(cpui.data(), 0x80000000);
- nExIds_ = cpui[0];
-#else
- nExIds_ = __get_cpuid_max(0x80000000, NULL);
-#endif
-
- char brand[0x40];
- memset(brand, 0, sizeof(brand));
-
- for (unsigned i = 0x80000000; i <= nExIds_; ++i)
- {
-#if defined(_MSC_VER) && !defined(__clang__)
- __cpuidex(cpui.data(), i, 0);
-#else
- int* data = cpui.data();
- __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
-#endif
- extdata_.push_back(cpui);
- }
-
- // load bitset with flags for function 0x80000001
- if (nExIds_ >= 0x80000001)
- {
- f_81_ECX_ = extdata_[1][2];
- f_81_EDX_ = extdata_[1][3];
- }
-
- // Interpret CPU brand string if reported
- if (nExIds_ >= 0x80000004)
- {
- memcpy(brand, extdata_[2].data(), sizeof(cpui));
- memcpy(brand + 16, extdata_[3].data(), sizeof(cpui));
- memcpy(brand + 32, extdata_[4].data(), sizeof(cpui));
- brand_ = brand;
- }
- };
-
- int nIds_;
- unsigned nExIds_;
- std::string vendor_;
- std::string brand_;
- bool isIntel_;
- bool isAMD_;
- std::bitset<32> f_1_ECX_;
- std::bitset<32> f_1_EDX_;
- std::bitset<32> f_7_EBX_;
- std::bitset<32> f_7_ECX_;
- std::bitset<32> f_81_ECX_;
- std::bitset<32> f_81_EDX_;
- std::vector<std::array<int, 4>> data_;
- std::vector<std::array<int, 4>> extdata_;
- };
- const InstructionSet_Internal CPU_Rep;
-};
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.cpp b/src/gallium/drivers/swr/rasterizer/common/os.cpp
deleted file mode 100644
index 75c7161b4e2..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/os.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#include "common/os.h"
-#include <vector>
-#include <array>
-#include <sstream>
-
-#if defined(_WIN32)
-#include <shlobj.h>
-#endif // Windows
-
-#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
-#include <pthread.h>
-#endif // Linux
-
-#if defined(_MSC_VER)
-static const DWORD MS_VC_EXCEPTION = 0x406D1388;
-
-#pragma pack(push, 8)
-typedef struct tagTHREADNAME_INFO
-{
- DWORD dwType; // Must be 0x1000.
- LPCSTR szName; // Pointer to name (in user addr space).
- DWORD dwThreadID; // Thread ID (-1=caller thread).
- DWORD dwFlags; // Reserved for future use, must be zero.
-} THREADNAME_INFO;
-#pragma pack(pop)
-
-void LegacySetThreadName(const char* pThreadName)
-{
- THREADNAME_INFO info;
- info.dwType = 0x1000;
- info.szName = pThreadName;
- info.dwThreadID = GetCurrentThreadId();
- info.dwFlags = 0;
-
- if (!IsDebuggerPresent())
- {
- // No debugger attached to interpret exception, no need to actually do it
- return;
- }
-
-#pragma warning(push)
-#pragma warning(disable : 6320 6322)
- __try
- {
- RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info);
- }
- __except (EXCEPTION_EXECUTE_HANDLER)
- {
- }
-#pragma warning(pop)
-}
-#endif // _WIN32
-
-void SWR_API SetCurrentThreadName(const char* pThreadName)
-{
-#if defined(_MSC_VER)
- // The SetThreadDescription API was brought in version 1607 of Windows 10.
- typedef HRESULT(WINAPI * PFNSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
- // The SetThreadDescription API works even if no debugger is attached.
- auto pfnSetThreadDescription = reinterpret_cast<PFNSetThreadDescription>(
- GetProcAddress(GetModuleHandleA("Kernel32.dll"), "SetThreadDescription"));
-
- if (!pfnSetThreadDescription)
- {
- // try KernelBase.dll
- pfnSetThreadDescription = reinterpret_cast<PFNSetThreadDescription>(
- GetProcAddress(GetModuleHandleA("KernelBase.dll"), "SetThreadDescription"));
- }
-
- if (pfnSetThreadDescription)
- {
- std::string utf8Name = pThreadName;
- std::wstring wideName;
- wideName.resize(utf8Name.size() + 1);
- swprintf_s(&(wideName.front()), wideName.size(), L"%S", utf8Name.c_str());
- HRESULT hr = pfnSetThreadDescription(GetCurrentThread(), wideName.c_str());
- SWR_ASSERT(SUCCEEDED(hr), "Failed to set thread name to %s", pThreadName);
-
- // Fall through - it seems like some debuggers only recognize the exception
- }
-
- // Fall back to exception based hack
- LegacySetThreadName(pThreadName);
-#endif // _WIN32
-
-#if defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
- pthread_setname_np(pthread_self(), pThreadName);
-#endif // Linux
-}
-
-#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
-static void
-SplitString(std::vector<std::string>& out_segments, const std::string& input, char splitToken)
-{
- out_segments.clear();
-
- std::istringstream f(input);
- std::string s;
- while (std::getline(f, s, splitToken))
- {
- if (s.size())
- {
- out_segments.push_back(s);
- }
- }
-}
-#endif // Unix
-
-void SWR_API CreateDirectoryPath(const std::string& path)
-{
-#if defined(_WIN32)
- SHCreateDirectoryExA(nullptr, path.c_str(), nullptr);
-#endif // Windows
-
-#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
- std::vector<std::string> pathSegments;
- SplitString(pathSegments, path, '/');
-
- std::string tmpPath;
- for (auto const& segment : pathSegments)
- {
- tmpPath.push_back('/');
- tmpPath += segment;
-
- int result = mkdir(tmpPath.c_str(), 0777);
- if (result == -1 && errno != EEXIST)
- {
- break;
- }
- }
-#endif // Unix
-}
-
-/// Execute Command (block until finished)
-/// @returns process exit value
-int SWR_API ExecCmd(const std::string& cmd, ///< (In) Command line string
- const char* pOptEnvStrings, ///< (Optional In) Environment block for new process
- std::string* pOptStdOut, ///< (Optional Out) Standard Output text
- std::string* pOptStdErr, ///< (Optional Out) Standard Error text
- const std::string* pOptStdIn) ///< (Optional In) Standard Input text
-{
- int rvalue = -1;
-
-#if defined(_WIN32)
- struct WinPipe
- {
- HANDLE hRead;
- HANDLE hWrite;
- };
- std::array<WinPipe, 3> hPipes = {};
-
- SECURITY_ATTRIBUTES saAttr = {sizeof(SECURITY_ATTRIBUTES)};
- saAttr.bInheritHandle = TRUE; // Pipe handles are inherited by child process.
- saAttr.lpSecurityDescriptor = NULL;
-
- {
- bool bFail = false;
- for (WinPipe& p : hPipes)
- {
- if (!CreatePipe(&p.hRead, &p.hWrite, &saAttr, 0))
- {
- bFail = true;
- }
- }
-
- if (bFail)
- {
- for (WinPipe& p : hPipes)
- {
- CloseHandle(p.hRead);
- CloseHandle(p.hWrite);
- }
- return rvalue;
- }
- }
-
- STARTUPINFOA StartupInfo{};
- StartupInfo.cb = sizeof(STARTUPINFOA);
- StartupInfo.dwFlags = STARTF_USESTDHANDLES;
- StartupInfo.dwFlags |= STARTF_USESHOWWINDOW;
- StartupInfo.wShowWindow = SW_HIDE;
- if (pOptStdIn)
- {
- StartupInfo.hStdInput = hPipes[0].hRead;
- }
- StartupInfo.hStdOutput = hPipes[1].hWrite;
- StartupInfo.hStdError = hPipes[2].hWrite;
- PROCESS_INFORMATION procInfo{};
-
- // CreateProcess can modify the string
- std::string local_cmd = cmd;
-
- BOOL ProcessValue = CreateProcessA(NULL,
- (LPSTR)local_cmd.c_str(),
- NULL,
- NULL,
- TRUE,
- 0,
- (LPVOID)pOptEnvStrings,
- NULL,
- &StartupInfo,
- &procInfo);
-
- if (ProcessValue && procInfo.hProcess)
- {
- auto ReadFromPipe = [](HANDLE hPipe, std::string* pOutStr) {
- char buf[1024];
- DWORD dwRead = 0;
- DWORD dwAvail = 0;
- while (true)
- {
- if (!::PeekNamedPipe(hPipe, NULL, 0, NULL, &dwAvail, NULL))
- {
- break;
- }
-
- if (!dwAvail) // no data available, return
- {
- break;
- }
-
- if (!::ReadFile(hPipe,
- buf,
- std::min<size_t>(sizeof(buf) - 1, size_t(dwAvail)),
- &dwRead,
- NULL) ||
- !dwRead)
- {
- // error, the child process might ended
- break;
- }
-
- buf[dwRead] = 0;
- if (pOutStr)
- {
- (*pOutStr) += buf;
- }
- }
- };
- bool bProcessEnded = false;
- size_t bytesWritten = 0;
- do
- {
- if (pOptStdIn && (pOptStdIn->size() > bytesWritten))
- {
- DWORD bytesToWrite = static_cast<DWORD>(pOptStdIn->size()) - bytesWritten;
- if (!::WriteFile(hPipes[0].hWrite,
- pOptStdIn->data() + bytesWritten,
- bytesToWrite,
- &bytesToWrite,
- nullptr))
- {
- // Failed to write to pipe
- break;
- }
- bytesWritten += bytesToWrite;
- }
-
- // Give some timeslice (50ms), so we won't waste 100% cpu.
- bProcessEnded = (WaitForSingleObject(procInfo.hProcess, 50) == WAIT_OBJECT_0);
-
- ReadFromPipe(hPipes[1].hRead, pOptStdOut);
- ReadFromPipe(hPipes[2].hRead, pOptStdErr);
- } while (!bProcessEnded);
-
- DWORD exitVal = 0;
- if (!GetExitCodeProcess(procInfo.hProcess, &exitVal))
- {
- exitVal = 1;
- }
-
- CloseHandle(procInfo.hProcess);
- CloseHandle(procInfo.hThread);
-
- rvalue = exitVal;
- }
-
- for (WinPipe& p : hPipes)
- {
- CloseHandle(p.hRead);
- CloseHandle(p.hWrite);
- }
-
-#else
-
- // Non-Windows implementation
-
-#endif
-
- return rvalue;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h
deleted file mode 100644
index ed42e1eb79e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ /dev/null
@@ -1,365 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#ifndef __SWR_OS_H__
-#define __SWR_OS_H__
-
-#include <cstddef>
-#include "core/knobs.h"
-
-#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
-
-#define SWR_API __cdecl
-#define SWR_VISIBLE __declspec(dllexport)
-
-#ifndef NOMINMAX
-#undef UNICODE
-#define NOMINMAX
-#include <windows.h>
-#undef NOMINMAX
-#define UNICODE
-#else
-#undef UNICODE
-#include <windows.h>
-#define UNICODE
-#endif
-#include <intrin.h>
-#include <cstdint>
-
-#if defined(MemoryFence)
-// Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
-#undef MemoryFence
-#endif
-
-#if defined(_MSC_VER)
-#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
-#elif defined(__GNUC__)
-#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
-#endif
-
-#if defined(_DEBUG)
-// We compile Debug builds with inline function expansion enabled. This allows
-// functions compiled with __forceinline to be inlined even in Debug builds.
-// The inline_depth(0) pragma below will disable inline function expansion for
-// normal INLINE / inline functions, but not for __forceinline functions.
-// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
-// Debug builds.
-#define INLINE inline
-#pragma inline_depth(0)
-#else
-// Use of __forceinline increases compile time dramatically in release builds
-// and provides almost 0 measurable benefit. Disable until we have a compelling
-// use-case
-// #define INLINE __forceinline
-#define INLINE inline
-#endif
-#ifndef FORCEINLINE
-#define FORCEINLINE __forceinline
-#endif
-
-#define DEBUGBREAK __debugbreak()
-
-#define PRAGMA_WARNING_PUSH_DISABLE(...) \
- __pragma(warning(push)); \
- __pragma(warning(disable : __VA_ARGS__));
-
-#define PRAGMA_WARNING_POP() __pragma(warning(pop))
-
-static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
-{
- return _aligned_malloc(_Size, _Alignment);
-}
-
-static inline void AlignedFree(void* p)
-{
- return _aligned_free(p);
-}
-
-#if defined(_WIN64)
-#define BitScanReverseSizeT BitScanReverse64
-#define BitScanForwardSizeT BitScanForward64
-#define _mm_popcount_sizeT _mm_popcnt_u64
-#else
-#define BitScanReverseSizeT BitScanReverse
-#define BitScanForwardSizeT BitScanForward
-#define _mm_popcount_sizeT _mm_popcnt_u32
-#endif
-
-#if !defined(_WIN64)
-extern "C" {
-inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
-{
- if (Mask == 0)
- return 0;
-#ifdef __GNUC__
- *Index = __builtin_ctzll(Mask);
-#else
- *Index = 0;
- for (int i = 0; i < 64; ++ i)
- if ((1ULL << i) & Mask)
- *Index = i;
-#endif
- return 1;
-}
-
-inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
-{
- if (Mask == 0)
- return 0;
-#ifdef __GNUC__
- *Index = 63 - __builtin_clzll(Mask);
-#else
- *Index = 0;
- for (int i = 63; i >= 0; -- i)
- if ((1ULL << i) & Mask)
- *Index = i;
-#endif
- return 1;
-}
-}
-#endif
-
-#elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
-
-#define SWR_API
-#define SWR_VISIBLE __attribute__((visibility("default")))
-
-#include <stdlib.h>
-#include <string.h>
-#include <x86intrin.h>
-#include <stdint.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <stdio.h>
-#include <limits.h>
-
-typedef void VOID;
-typedef void* LPVOID;
-typedef int INT;
-typedef unsigned int UINT;
-typedef void* HANDLE;
-typedef int LONG;
-typedef unsigned int DWORD;
-
-#undef FALSE
-#define FALSE 0
-
-#undef TRUE
-#define TRUE 1
-
-#define MAX_PATH PATH_MAX
-
-#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
-#ifndef INLINE
-#define INLINE __inline
-#endif
-#ifndef FORCEINLINE
-#define FORCEINLINE INLINE
-#endif
-#define DEBUGBREAK asm("int $3")
-
-#if !defined(__CYGWIN__)
-
-#ifndef __cdecl
-#define __cdecl
-#endif
-#ifndef __stdcall
-#define __stdcall
-#endif
-
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-#define __declspec(x) __declspec_##x
-#define __declspec_align(y) __attribute__((aligned(y)))
-#define __declspec_deprecated __attribute__((deprecated))
-#define __declspec_dllexport
-#define __declspec_dllimport
-#define __declspec_noinline __attribute__((__noinline__))
-#define __declspec_nothrow __attribute__((nothrow))
-#define __declspec_novtable
-#define __declspec_thread __thread
-#else
-#define __declspec(X)
-#endif
-
-#endif
-
-#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-
-#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
-inline uint64_t __rdtsc()
-{
- long low, high;
- asm volatile("rdtsc" : "=a"(low), "=d"(high));
- return (low | ((uint64_t)high << 32));
-}
-#endif
-
-#if !defined(__clang__) && !defined(__INTEL_COMPILER)
-// Intrinsic not defined in gcc < 10
-#if (__GNUC__) && (GCC_VERSION < 100000)
-static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
-{
- _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
- _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
-}
-#endif
-
-// gcc prior to 4.9 doesn't have _mm*_undefined_*
-#if (__GNUC__) && (GCC_VERSION < 40900)
-#define _mm_undefined_si128 _mm_setzero_si128
-#define _mm256_undefined_ps _mm256_setzero_ps
-#endif
-#endif
-
-inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
-{
- if (Mask == 0)
- return 0;
- *Index = __builtin_ctzll(Mask);
- return 1;
-}
-
-inline unsigned char _BitScanForward(unsigned long* Index, uint32_t Mask)
-{
- if (Mask == 0)
- return 0;
- *Index = __builtin_ctz(Mask);
- return 1;
-}
-
-inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
-{
- if (Mask == 0)
- return 0;
- *Index = 63 - __builtin_clzll(Mask);
- return 1;
-}
-
-inline unsigned char _BitScanReverse(unsigned long* Index, uint32_t Mask)
-{
- if (Mask == 0)
- return 0;
- *Index = 31 - __builtin_clz(Mask);
- return 1;
-}
-
-inline void* AlignedMalloc(size_t size, size_t alignment)
-{
- void* ret;
- if (posix_memalign(&ret, alignment, size))
- {
- return NULL;
- }
- return ret;
-}
-
-static inline void AlignedFree(void* p)
-{
- free(p);
-}
-
-#define _countof(a) (sizeof(a) / sizeof(*(a)))
-
-#define sprintf_s sprintf
-#define strcpy_s(dst, size, src) strncpy(dst, src, size)
-#define GetCurrentProcessId getpid
-
-#define InterlockedCompareExchange(Dest, Exchange, Comparand) \
- __sync_val_compare_and_swap(Dest, Comparand, Exchange)
-#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
-#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
-#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
-#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
-#define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
-#define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
-#define _ReadWriteBarrier() asm volatile("" ::: "memory")
-
-#define PRAGMA_WARNING_PUSH_DISABLE(...)
-#define PRAGMA_WARNING_POP()
-
-#define ZeroMemory(dst, size) memset(dst, 0, size)
-#else
-
-#error Unsupported OS/system.
-
-#endif
-
-#define THREAD thread_local
-
-// Universal types
-typedef uint8_t KILOBYTE[1024];
-typedef KILOBYTE MEGABYTE[1024];
-typedef MEGABYTE GIGABYTE[1024];
-
-#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
-#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
-#define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
-
-#include "common/swr_assert.h"
-
-#ifdef __GNUC__
-#define ATTR_UNUSED __attribute__((unused))
-#else
-#define ATTR_UNUSED
-#endif
-
-#define SWR_FUNC(_retType, _funcName, /* args */...) \
- typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
- _retType SWR_API _funcName(__VA_ARGS__);
-
-// Defined in os.cpp
-void SWR_API SetCurrentThreadName(const char* pThreadName);
-void SWR_API CreateDirectoryPath(const std::string& path);
-
-/// Execute Command (block until finished)
-/// @returns process exit value
-int SWR_API
- ExecCmd(const std::string& cmd, ///< (In) Command line string
- const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
- std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text
- std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
- const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
-
-
-/// Helper for setting up FP state
-/// @returns old csr state
-static INLINE uint32_t SetOptimalVectorCSR()
-{
- uint32_t oldCSR = _mm_getcsr();
-
- uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
- newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
- _mm_setcsr(newCSR);
-
- return oldCSR;
-}
-
-/// Set Vector CSR state.
-/// @param csrState - should be value returned from SetOptimalVectorCSR()
-static INLINE void RestoreVectorCSR(uint32_t csrState)
-{
- _mm_setcsr(csrState);
-}
-
-#endif //__SWR_OS_H__
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp
deleted file mode 100644
index e2076e8fc44..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file rdtsc_buckets.cpp
- *
- * @brief implementation of rdtsc buckets.
- *
- * Notes:
- *
- ******************************************************************************/
-#include "rdtsc_buckets.h"
-#include <inttypes.h>
-
-#if defined(_WIN32)
-#define PATH_SEPARATOR "\\"
-#elif defined(__unix__) || defined(__APPLE__)
-#define PATH_SEPARATOR "/"
-#else
-#error "Unsupported platform"
-#endif
-
-THREAD UINT tlsThreadId = 0;
-
-BucketManager::~BucketManager()
-{
-}
-
-void BucketManager::RegisterThread(const std::string& name)
-{
-
- BUCKET_THREAD newThread;
- newThread.name = name;
- newThread.root.children.reserve(mBuckets.size());
- newThread.root.id = 0;
- newThread.root.pParent = nullptr;
- newThread.pCurrent = &newThread.root;
-
- mThreadMutex.lock();
-
- // assign unique thread id for this thread
- size_t id = mThreads.size();
- newThread.id = (UINT)id;
- tlsThreadId = (UINT)id;
-
- // store new thread
- mThreads.push_back(newThread);
-
- mThreadMutex.unlock();
-}
-
-UINT BucketManager::RegisterBucket(const BUCKET_DESC& desc)
-{
- mThreadMutex.lock();
- size_t id = mBuckets.size();
- mBuckets.push_back(desc);
- mThreadMutex.unlock();
- return (UINT)id;
-}
-
-void BucketManager::PrintBucket(
- FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket)
-{
- const char* arrows[] = {
- "",
- "|-> ",
- " |-> ",
- " |-> ",
- " |-> ",
- " |-> ",
- " |-> ",
- " |-> ",
- " |-> ",
- };
-
- // compute percent of total cycles used by this bucket
- float percentTotal = (float)((double)bucket.elapsed / (double)threadCycles * 100.0);
-
- // compute percent of parent cycles used by this bucket
- float percentParent = (float)((double)bucket.elapsed / (double)parentCycles * 100.0);
-
- // compute average cycle count per invocation
- uint64_t CPE = bucket.elapsed / bucket.count;
-
- BUCKET_DESC& desc = mBuckets[bucket.id];
-
- // construct hierarchy visualization
- std::string str = arrows[level];
- str += desc.name;
- char hier[80];
- strcpy_s(hier, sizeof(hier)-1, str.c_str());
-
- // print out
- fprintf(f,
- "%6.2f %6.2f %-10" PRIu64 " %-10" PRIu64 " %-10u %-10lu %-10u %s\n",
- percentTotal,
- percentParent,
- bucket.elapsed,
- CPE,
- bucket.count,
- (unsigned long)0,
- (uint32_t)0,
- hier);
-
- // dump all children of this bucket
- for (const BUCKET& child : bucket.children)
- {
- if (child.count)
- {
- PrintBucket(f, level + 1, threadCycles, bucket.elapsed, child);
- }
- }
-}
-
-void BucketManager::PrintThread(FILE* f, const BUCKET_THREAD& thread)
-{
- // print header
- fprintf(f, "\nThread %u (%s)\n", thread.id, thread.name.c_str());
- fprintf(f, " %%Tot %%Par Cycles CPE NumEvent CPE2 NumEvent2 Bucket\n");
-
- // compute thread level total cycle counts across all buckets from root
- const BUCKET& root = thread.root;
- uint64_t totalCycles = 0;
- for (const BUCKET& child : root.children)
- {
- totalCycles += child.elapsed;
- }
-
- for (const BUCKET& child : root.children)
- {
- if (child.count)
- {
- PrintBucket(f, 0, totalCycles, totalCycles, child);
- }
- }
-}
-
-void BucketManager::PrintReport(const std::string& filename)
-{
- {
- FILE* f = fopen(filename.c_str(), "w");
- assert(f);
-
- mThreadMutex.lock();
- for (const BUCKET_THREAD& thread : mThreads)
- {
- PrintThread(f, thread);
- fprintf(f, "\n");
- }
-
- mThreadMutex.unlock();
-
- fclose(f);
- }
-}
-
-
-void BucketManager::StartCapture()
-{
-
- printf("Capture Starting\n");
-
- mCapturing = true;
-}
-
-void BucketManager_StartBucket(BucketManager* pBucketMgr, uint32_t id)
-{
- pBucketMgr->StartBucket(id);
-}
-
-void BucketManager_StopBucket(BucketManager* pBucketMgr, uint32_t id)
-{
- pBucketMgr->StopBucket(id);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
deleted file mode 100644
index b00cbf63eba..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file rdtsc_buckets.h
- *
- * @brief declaration for rdtsc buckets.
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "os.h"
-#include <vector>
-#include <mutex>
-#include <sstream>
-
-#include "rdtsc_buckets_shared.h"
-
-
-// unique thread id stored in thread local storage
-extern THREAD UINT tlsThreadId;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief BucketManager encapsulates a single instance of the buckets
-/// functionality. There can be one or many bucket managers active
-/// at any time. The manager owns all the threads and
-/// bucket information that have been registered to it.
-class BucketManager
-{
-public:
-
- uint32_t mCurrentFrame;
- std::vector<uint32_t> mBucketMap;
- bool mBucketsInitialized;
- std::string mBucketMgrName;
-
-
- BucketManager(std::string name) : mCurrentFrame(0), mBucketsInitialized(false), mBucketMgrName(name)
- {
- mBucketMap.clear();
- }
- ~BucketManager();
-
- // removes all registered thread data
- void ClearThreads()
- {
- mThreadMutex.lock();
- mThreads.clear();
- mThreadMutex.unlock();
- }
-
- // removes all registered buckets
- void ClearBuckets()
- {
- mThreadMutex.lock();
- mBuckets.clear();
- mThreadMutex.unlock();
- }
-
- /// Registers a new thread with the manager.
- /// @param name - name of thread, used for labels in reports and threadviz
- void RegisterThread(const std::string& name);
-
- /// Registers a new bucket type with the manager. Returns a unique
- /// id which should be used in subsequent calls to start/stop the bucket
- /// @param desc - description of the bucket
- /// @return unique id
- UINT RegisterBucket(const BUCKET_DESC& desc);
-
- // print report
- void PrintReport(const std::string& filename);
-
-
- // start capturing
- void StartCapture();
-
- // stop capturing
- INLINE void StopCapture()
- {
- mCapturing = false;
-
- // wait for all threads to pop back to root bucket
- bool stillCapturing = true;
- while (stillCapturing)
- {
- stillCapturing = false;
- for (const BUCKET_THREAD& t : mThreads)
- {
- if (t.level > 0)
- {
- stillCapturing = true;
- continue;
- }
- }
- }
-
- mDoneCapturing = true;
- printf("Capture Stopped\n");
- }
-
- // start a bucket
- // @param id generated by RegisterBucket
- INLINE void StartBucket(UINT id)
- {
- if (!mCapturing)
- return;
-
- SWR_ASSERT(tlsThreadId < mThreads.size());
-
- BUCKET_THREAD& bt = mThreads[tlsThreadId];
-
- uint64_t tsc = __rdtsc();
-
- {
- if (bt.pCurrent->children.size() < mBuckets.size())
- {
- bt.pCurrent->children.resize(mBuckets.size());
- }
- BUCKET& child = bt.pCurrent->children[id];
- child.pParent = bt.pCurrent;
- child.id = id;
- child.start = tsc;
-
- // update thread's currently executing bucket
- bt.pCurrent = &child;
- }
-
-
- bt.level++;
- }
-
- // stop the currently executing bucket
- INLINE void StopBucket(UINT id)
- {
- SWR_ASSERT(tlsThreadId < mThreads.size());
- BUCKET_THREAD& bt = mThreads[tlsThreadId];
-
- if (bt.level == 0)
- {
- return;
- }
-
- uint64_t tsc = __rdtsc();
-
- {
- if (bt.pCurrent->start == 0)
- return;
- SWR_ASSERT(bt.pCurrent->id == id, "Mismatched buckets detected");
-
- bt.pCurrent->elapsed += (tsc - bt.pCurrent->start);
- bt.pCurrent->count++;
-
- // pop to parent
- bt.pCurrent = bt.pCurrent->pParent;
- }
-
- bt.level--;
- }
-
- INLINE void AddEvent(uint32_t id, uint32_t count)
- {
- if (!mCapturing)
- return;
-
- SWR_ASSERT(tlsThreadId < mThreads.size());
-
- BUCKET_THREAD& bt = mThreads[tlsThreadId];
-
- // don't record events for threadviz
- {
- if (bt.pCurrent->children.size() < mBuckets.size())
- {
- bt.pCurrent->children.resize(mBuckets.size());
- }
- BUCKET& child = bt.pCurrent->children[id];
- child.pParent = bt.pCurrent;
- child.id = id;
- child.count += count;
- }
- }
-
-private:
- void PrintBucket(
- FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket);
- void PrintThread(FILE* f, const BUCKET_THREAD& thread);
-
- // list of active threads that have registered with this manager
- std::vector<BUCKET_THREAD> mThreads;
-
- // list of buckets registered with this manager
- std::vector<BUCKET_DESC> mBuckets;
-
- // is capturing currently enabled
- volatile bool mCapturing{false};
-
- // has capturing completed
- volatile bool mDoneCapturing{false};
-
- std::mutex mThreadMutex;
-
- std::string mThreadVizDir;
-
-};
-
-// C helpers for jitter
-void BucketManager_StartBucket(BucketManager* pBucketMgr, uint32_t id);
-void BucketManager_StopBucket(BucketManager* pBucketMgr, uint32_t id);
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h
deleted file mode 100644
index fd3b1df746a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file rdtsc_buckets.h
- *
- * @brief declaration for rdtsc buckets.
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include <vector>
-#include <cassert>
-
-struct BUCKET
-{
- uint32_t id{0};
- uint64_t start{0};
- uint64_t elapsed{0};
- uint32_t count{0};
-
- BUCKET* pParent{nullptr};
- std::vector<BUCKET> children;
-};
-
-struct BUCKET_DESC
-{
- // name of bucket, used in reports
- std::string name;
-
- // description of bucket, used in threadviz
- std::string description;
-
- // enable for threadviz dumping
- bool enableThreadViz;
-
- // threadviz color of bucket, in RGBA8_UNORM format
- uint32_t color;
-};
-
-
-struct BUCKET_THREAD
-{
- // name of thread, used in reports
- std::string name;
-
- // id for this thread, assigned by the thread manager
- uint32_t id{0};
-
- // root of the bucket hierarchy for this thread
- BUCKET root;
-
- // currently executing bucket somewhere in the hierarchy
- BUCKET* pCurrent{nullptr};
-
- // currently executing hierarchy level
- uint32_t level{0};
-
- // threadviz file object
- FILE* vizFile{nullptr};
-
-
- BUCKET_THREAD() {}
- BUCKET_THREAD(const BUCKET_THREAD& that)
- {
- name = that.name;
- id = that.id;
- root = that.root;
- pCurrent = &root;
- vizFile = that.vizFile;
- }
-};
-
-enum VIZ_TYPE
-{
- VIZ_START = 0,
- VIZ_STOP = 1,
- VIZ_DATA = 2
-};
-
-struct VIZ_START_DATA
-{
- uint8_t type;
- uint32_t bucketId;
- uint64_t timestamp;
-};
-
-struct VIZ_STOP_DATA
-{
- uint8_t type;
- uint64_t timestamp;
-};
-
-inline void Serialize(FILE* f, const VIZ_START_DATA& data)
-{
- fwrite(&data, sizeof(VIZ_START_DATA), 1, f);
-}
-
-inline void Deserialize(FILE* f, VIZ_START_DATA& data)
-{
- fread(&data, sizeof(VIZ_START_DATA), 1, f);
- assert(data.type == VIZ_START);
-}
-
-inline void Serialize(FILE* f, const VIZ_STOP_DATA& data)
-{
- fwrite(&data, sizeof(VIZ_STOP_DATA), 1, f);
-}
-
-inline void Deserialize(FILE* f, VIZ_STOP_DATA& data)
-{
- fread(&data, sizeof(VIZ_STOP_DATA), 1, f);
- assert(data.type == VIZ_STOP);
-}
-
-inline void Serialize(FILE* f, const std::string& string)
-{
- assert(string.size() <= 256);
-
- uint8_t length = (uint8_t)string.size();
- fwrite(&length, sizeof(length), 1, f);
- fwrite(string.c_str(), string.size(), 1, f);
-}
-
-inline void Deserialize(FILE* f, std::string& string)
-{
- char cstr[256];
- uint8_t length;
- fread(&length, sizeof(length), 1, f);
- fread(cstr, length, 1, f);
- cstr[length] = 0;
- string.assign(cstr);
-}
-
-inline void Serialize(FILE* f, const BUCKET_DESC& desc)
-{
- Serialize(f, desc.name);
- Serialize(f, desc.description);
- fwrite(&desc.enableThreadViz, sizeof(desc.enableThreadViz), 1, f);
- fwrite(&desc.color, sizeof(desc.color), 1, f);
-}
-
-inline void Deserialize(FILE* f, BUCKET_DESC& desc)
-{
- Deserialize(f, desc.name);
- Deserialize(f, desc.description);
- fread(&desc.enableThreadViz, sizeof(desc.enableThreadViz), 1, f);
- fread(&desc.color, sizeof(desc.color), 1, f);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
deleted file mode 100644
index 5964edff4d3..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#ifndef __SWR_SIMD16INTRIN_H__
-#define __SWR_SIMD16INTRIN_H__
-
-#if KNOB_SIMD16_WIDTH == 16
-typedef SIMD512 SIMD16;
-#else
-#error Unsupported vector width
-#endif // KNOB_SIMD16_WIDTH == 16
-
-#define _simd16_setzero_ps SIMD16::setzero_ps
-#define _simd16_setzero_si SIMD16::setzero_si
-#define _simd16_set1_ps SIMD16::set1_ps
-#define _simd16_set1_epi8 SIMD16::set1_epi8
-#define _simd16_set1_epi32 SIMD16::set1_epi32
-#define _simd16_set_ps SIMD16::set_ps
-#define _simd16_set_epi32 SIMD16::set_epi32
-#define _simd16_load_ps SIMD16::load_ps
-#define _simd16_loadu_ps SIMD16::loadu_ps
-#if 1
-#define _simd16_load1_ps SIMD16::broadcast_ss
-#endif
-#define _simd16_load_si SIMD16::load_si
-#define _simd16_loadu_si SIMD16::loadu_si
-#define _simd16_broadcast_ss(m) SIMD16::broadcast_ss((float const*)m)
-#define _simd16_store_ps SIMD16::store_ps
-#define _simd16_store_si SIMD16::store_si
-#define _simd16_extract_ps(a, imm8) SIMD16::extract_ps<imm8>(a)
-#define _simd16_extract_si(a, imm8) SIMD16::extract_si<imm8>(a)
-#define _simd16_insert_ps(a, b, imm8) SIMD16::insert_ps<imm8>(a, b)
-#define _simd16_insert_si(a, b, imm8) SIMD16::insert_si<imm8>(a, b)
-#define _simd16_maskstore_ps SIMD16::maskstore_ps
-#define _simd16_blend_ps(a, b, mask) SIMD16::blend_ps<mask>(a, b)
-#define _simd16_blendv_ps SIMD16::blendv_ps
-#define _simd16_blendv_epi32 SIMD16::blendv_epi32
-#define _simd16_mul_ps SIMD16::mul_ps
-#define _simd16_div_ps SIMD16::div_ps
-#define _simd16_add_ps SIMD16::add_ps
-#define _simd16_sub_ps SIMD16::sub_ps
-#define _simd16_rsqrt_ps SIMD16::rsqrt_ps
-#define _simd16_min_ps SIMD16::min_ps
-#define _simd16_max_ps SIMD16::max_ps
-#define _simd16_movemask_ps SIMD16::movemask_ps
-#define _simd16_movemask_pd SIMD16::movemask_pd
-#define _simd16_cvtps_epi32 SIMD16::cvtps_epi32
-#define _simd16_cvttps_epi32 SIMD16::cvttps_epi32
-#define _simd16_cvtepi32_ps SIMD16::cvtepi32_ps
-#define _simd16_cmp_ps(a, b, comp) SIMD16::cmp_ps<SIMD16::CompareType(comp)>(a, b)
-#define _simd16_cmplt_ps SIMD16::cmplt_ps
-#define _simd16_cmpgt_ps SIMD16::cmpgt_ps
-#define _simd16_cmpneq_ps SIMD16::cmpneq_ps
-#define _simd16_cmpeq_ps SIMD16::cmpeq_ps
-#define _simd16_cmpge_ps SIMD16::cmpge_ps
-#define _simd16_cmple_ps SIMD16::cmple_ps
-#define _simd16_castsi_ps SIMD16::castsi_ps
-#define _simd16_castps_si SIMD16::castps_si
-#define _simd16_castsi_pd SIMD16::castsi_pd
-#define _simd16_castpd_si SIMD16::castpd_si
-#define _simd16_castpd_ps SIMD16::castpd_ps
-#define _simd16_castps_pd SIMD16::castps_pd
-#define _simd16_and_ps SIMD16::and_ps
-#define _simd16_andnot_ps SIMD16::andnot_ps
-#define _simd16_or_ps SIMD16::or_ps
-#define _simd16_xor_ps SIMD16::xor_ps
-#define _simd16_round_ps(a, mode) SIMD16::round_ps<SIMD16::RoundMode(mode)>(a)
-#define _simd16_mul_epi32 SIMD16::mul_epi32
-#define _simd16_mullo_epi32 SIMD16::mullo_epi32
-#define _simd16_sub_epi32 SIMD16::sub_epi32
-#define _simd16_sub_epi64 SIMD16::sub_epi64
-#define _simd16_min_epi32 SIMD16::min_epi32
-#define _simd16_max_epi32 SIMD16::max_epi32
-#define _simd16_min_epu32 SIMD16::min_epu32
-#define _simd16_max_epu32 SIMD16::max_epu32
-#define _simd16_add_epi32 SIMD16::add_epi32
-#define _simd16_and_si SIMD16::and_si
-#define _simd16_andnot_si SIMD16::andnot_si
-#define _simd16_or_si SIMD16::or_si
-#define _simd16_xor_si SIMD16::xor_si
-#define _simd16_cmpeq_epi32 SIMD16::cmpeq_epi32
-#define _simd16_cmpgt_epi32 SIMD16::cmpgt_epi32
-#define _simd16_cmplt_epi32 SIMD16::cmplt_epi32
-#define _simd16_testz_ps SIMD16::testz_ps
-#define _simd16_unpacklo_ps SIMD16::unpacklo_ps
-#define _simd16_unpackhi_ps SIMD16::unpackhi_ps
-#define _simd16_unpacklo_pd SIMD16::unpacklo_pd
-#define _simd16_unpackhi_pd SIMD16::unpackhi_pd
-#define _simd16_unpacklo_epi8 SIMD16::unpacklo_epi8
-#define _simd16_unpackhi_epi8 SIMD16::unpackhi_epi8
-#define _simd16_unpacklo_epi16 SIMD16::unpacklo_epi16
-#define _simd16_unpackhi_epi16 SIMD16::unpackhi_epi16
-#define _simd16_unpacklo_epi32 SIMD16::unpacklo_epi32
-#define _simd16_unpackhi_epi32 SIMD16::unpackhi_epi32
-#define _simd16_unpacklo_epi64 SIMD16::unpacklo_epi64
-#define _simd16_unpackhi_epi64 SIMD16::unpackhi_epi64
-#define _simd16_slli_epi32(a, i) SIMD16::slli_epi32<i>(a)
-#define _simd16_srli_epi32(a, i) SIMD16::srli_epi32<i>(a)
-#define _simd16_srai_epi32(a, i) SIMD16::srai_epi32<i>(a)
-#define _simd16_fmadd_ps SIMD16::fmadd_ps
-#define _simd16_fmsub_ps SIMD16::fmsub_ps
-#define _simd16_adds_epu8 SIMD16::adds_epu8
-#define _simd16_subs_epu8 SIMD16::subs_epu8
-#define _simd16_add_epi8 SIMD16::add_epi8
-#define _simd16_shuffle_epi8 SIMD16::shuffle_epi8
-
-#define _simd16_i32gather_ps(m, index, scale) \
- SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
-#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) \
- SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
-
-#define _simd16_abs_epi32 SIMD16::abs_epi32
-
-#define _simd16_cmpeq_epi64 SIMD16::cmpeq_epi64
-#define _simd16_cmpgt_epi64 SIMD16::cmpgt_epi64
-#define _simd16_cmpeq_epi16 SIMD16::cmpeq_epi16
-#define _simd16_cmpgt_epi16 SIMD16::cmpgt_epi16
-#define _simd16_cmpeq_epi8 SIMD16::cmpeq_epi8
-#define _simd16_cmpgt_epi8 SIMD16::cmpgt_epi8
-
-#define _simd16_permute_ps_i(a, i) SIMD16::permute_ps<i>(a)
-#define _simd16_permute_ps SIMD16::permute_ps
-#define _simd16_permute_epi32 SIMD16::permute_epi32
-#define _simd16_sllv_epi32 SIMD16::sllv_epi32
-#define _simd16_srlv_epi32 SIMD16::sllv_epi32
-#define _simd16_permute2f128_ps(a, b, i) SIMD16::permute2f128_ps<i>(a, b)
-#define _simd16_permute2f128_pd(a, b, i) SIMD16::permute2f128_pd<i>(a, b)
-#define _simd16_permute2f128_si(a, b, i) SIMD16::permute2f128_si<i>(a, b)
-#define _simd16_shuffle_ps(a, b, i) SIMD16::shuffle_ps<i>(a, b)
-#define _simd16_shuffle_pd(a, b, i) SIMD16::shuffle_pd<i>(a, b)
-#define _simd16_shuffle_epi32(a, b, imm8) SIMD16::shuffle_epi32<imm8>(a, b)
-#define _simd16_shuffle_epi64(a, b, imm8) SIMD16::shuffle_epi64<imm8>(a, b)
-#define _simd16_cvtepu8_epi16 SIMD16::cvtepu8_epi16
-#define _simd16_cvtepu8_epi32 SIMD16::cvtepu8_epi32
-#define _simd16_cvtepu16_epi32 SIMD16::cvtepu16_epi32
-#define _simd16_cvtepu16_epi64 SIMD16::cvtepu16_epi64
-#define _simd16_cvtepu32_epi64 SIMD16::cvtepu32_epi64
-#define _simd16_packus_epi16 SIMD16::packus_epi16
-#define _simd16_packs_epi16 SIMD16::packs_epi16
-#define _simd16_packus_epi32 SIMD16::packus_epi32
-#define _simd16_packs_epi32 SIMD16::packs_epi32
-#define _simd16_cmplt_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
-#define _simd16_cmpeq_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
-#define _simd16_int2mask(mask) simd16mask(mask)
-#define _simd16_mask2int(mask) int(mask)
-#define _simd16_vmask_ps SIMD16::vmask_ps
-
-#endif //__SWR_SIMD16INTRIN_H_
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
deleted file mode 100644
index ebb4f4b7f11..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ /dev/null
@@ -1,322 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#ifndef __SWR_SIMDINTRIN_H__
-#define __SWR_SIMDINTRIN_H__
-
-#include "common/intrin.h"
-#include "common/simdlib.hpp"
-
-#if KNOB_SIMD_WIDTH == 8
-typedef SIMD256 SIMD;
-#else
-#error Unsupported vector width
-#endif // KNOB_SIMD16_WIDTH == 16
-
-#define _simd128_maskstore_ps SIMD128::maskstore_ps
-#define _simd128_fmadd_ps SIMD128::fmadd_ps
-
-#define _simd_load_ps SIMD::load_ps
-#define _simd_load1_ps SIMD::broadcast_ss
-#define _simd_loadu_ps SIMD::loadu_ps
-#define _simd_setzero_ps SIMD::setzero_ps
-#define _simd_set1_ps SIMD::set1_ps
-#define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)
-#define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)
-#define _simd_blendv_ps SIMD::blendv_ps
-#define _simd_store_ps SIMD::store_ps
-#define _simd_mul_ps SIMD::mul_ps
-#define _simd_add_ps SIMD::add_ps
-#define _simd_sub_ps SIMD::sub_ps
-#define _simd_rsqrt_ps SIMD::rsqrt_ps
-#define _simd_min_ps SIMD::min_ps
-#define _simd_max_ps SIMD::max_ps
-#define _simd_movemask_ps SIMD::movemask_ps
-#define _simd_cvtps_epi32 SIMD::cvtps_epi32
-#define _simd_cvttps_epi32 SIMD::cvttps_epi32
-#define _simd_cvtepi32_ps SIMD::cvtepi32_ps
-#define _simd_cmplt_ps SIMD::cmplt_ps
-#define _simd_cmpgt_ps SIMD::cmpgt_ps
-#define _simd_cmpneq_ps SIMD::cmpneq_ps
-#define _simd_cmpeq_ps SIMD::cmpeq_ps
-#define _simd_cmpge_ps SIMD::cmpge_ps
-#define _simd_cmple_ps SIMD::cmple_ps
-#define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
-#define _simd_and_ps SIMD::and_ps
-#define _simd_or_ps SIMD::or_ps
-#define _simd_rcp_ps SIMD::rcp_ps
-#define _simd_div_ps SIMD::div_ps
-#define _simd_castsi_ps SIMD::castsi_ps
-#define _simd_castps_pd SIMD::castps_pd
-#define _simd_castpd_ps SIMD::castpd_ps
-#define _simd_andnot_ps SIMD::andnot_ps
-#define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)
-#define _simd_castpd_ps SIMD::castpd_ps
-#define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const*)(a))
-#define _simd_stream_ps SIMD::stream_ps
-
-#define _simd_movemask_pd SIMD::movemask_pd
-#define _simd_castsi_pd SIMD::castsi_pd
-
-#define _simd_mul_epi32 SIMD::mul_epi32
-#define _simd_mullo_epi32 SIMD::mullo_epi32
-#define _simd_sub_epi32 SIMD::sub_epi32
-#define _simd_sub_epi64 SIMD::sub_epi64
-#define _simd_min_epi32 SIMD::min_epi32
-#define _simd_min_epu32 SIMD::min_epu32
-#define _simd_max_epi32 SIMD::max_epi32
-#define _simd_max_epu32 SIMD::max_epu32
-#define _simd_add_epi32 SIMD::add_epi32
-#define _simd_and_si SIMD::and_si
-#define _simd_andnot_si SIMD::andnot_si
-#define _simd_cmpeq_epi32 SIMD::cmpeq_epi32
-#define _simd_cmplt_epi32 SIMD::cmplt_epi32
-#define _simd_cmpgt_epi32 SIMD::cmpgt_epi32
-#define _simd_or_si SIMD::or_si
-#define _simd_xor_si SIMD::xor_si
-#define _simd_castps_si SIMD::castps_si
-#define _simd_adds_epu8 SIMD::adds_epu8
-#define _simd_subs_epu8 SIMD::subs_epu8
-#define _simd_add_epi8 SIMD::add_epi8
-#define _simd_cmpeq_epi64 SIMD::cmpeq_epi64
-#define _simd_cmpgt_epi64 SIMD::cmpgt_epi64
-#define _simd_cmpgt_epi8 SIMD::cmpgt_epi8
-#define _simd_cmpeq_epi8 SIMD::cmpeq_epi8
-#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
-#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
-#define _simd_movemask_epi8 SIMD::movemask_epi8
-#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
-#define _simd_permute_ps SIMD::permute_ps
-#define _simd_permute_epi32 SIMD::permute_epi32
-#define _simd_srlv_epi32 SIMD::srlv_epi32
-#define _simd_sllv_epi32 SIMD::sllv_epi32
-
-#define _simd_unpacklo_epi8 SIMD::unpacklo_epi8
-#define _simd_unpackhi_epi8 SIMD::unpackhi_epi8
-#define _simd_unpacklo_epi16 SIMD::unpacklo_epi16
-#define _simd_unpackhi_epi16 SIMD::unpackhi_epi16
-#define _simd_unpacklo_epi32 SIMD::unpacklo_epi32
-#define _simd_unpackhi_epi32 SIMD::unpackhi_epi32
-#define _simd_unpacklo_epi64 SIMD::unpacklo_epi64
-#define _simd_unpackhi_epi64 SIMD::unpackhi_epi64
-
-#define _simd_slli_epi32(a, i) SIMD::slli_epi32<i>(a)
-#define _simd_srai_epi32(a, i) SIMD::srai_epi32<i>(a)
-#define _simd_srli_epi32(a, i) SIMD::srli_epi32<i>(a)
-#define _simd_srlisi_ps(a, i) SIMD::srlisi_ps<i>(a)
-
-#define _simd_fmadd_ps SIMD::fmadd_ps
-#define _simd_fmsub_ps SIMD::fmsub_ps
-#define _simd_shuffle_epi8 SIMD::shuffle_epi8
-
-#define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
-#define _simd_mask_i32gather_ps(r, p, o, m, s) \
- SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
-#define _simd_abs_epi32 SIMD::abs_epi32
-
-#define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16
-#define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32
-#define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32
-#define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64
-#define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64
-
-#define _simd_packus_epi16 SIMD::packus_epi16
-#define _simd_packs_epi16 SIMD::packs_epi16
-#define _simd_packus_epi32 SIMD::packus_epi32
-#define _simd_packs_epi32 SIMD::packs_epi32
-
-#define _simd_unpacklo_ps SIMD::unpacklo_ps
-#define _simd_unpackhi_ps SIMD::unpackhi_ps
-#define _simd_unpacklo_pd SIMD::unpacklo_pd
-#define _simd_unpackhi_pd SIMD::unpackhi_pd
-#define _simd_insertf128_ps SIMD::insertf128_ps
-#define _simd_insertf128_pd SIMD::insertf128_pd
-#define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)
-#define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)
-#define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)
-#define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)
-#define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)
-#define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)
-#define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)
-#define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)
-#define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)
-#define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)
-#define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)
-#define _simd_set1_epi32 SIMD::set1_epi32
-#define _simd_set_epi32 SIMD::set_epi32
-#define _simd_set_ps SIMD::set_ps
-#define _simd_set1_epi8 SIMD::set1_epi8
-#define _simd_setzero_si SIMD::setzero_si
-#define _simd_cvttps_epi32 SIMD::cvttps_epi32
-#define _simd_store_si SIMD::store_si
-#define _simd_broadcast_ss SIMD::broadcast_ss
-#define _simd_maskstore_ps SIMD::maskstore_ps
-#define _simd_load_si SIMD::load_si
-#define _simd_loadu_si SIMD::loadu_si
-#define _simd_sub_ps SIMD::sub_ps
-#define _simd_testz_ps SIMD::testz_ps
-#define _simd_testz_si SIMD::testz_si
-#define _simd_xor_ps SIMD::xor_ps
-
-#define _simd_loadu2_si SIMD::loadu2_si
-#define _simd_storeu2_si SIMD::storeu2_si
-
-#define _simd_blendv_epi32 SIMD::blendv_epi32
-#define _simd_vmask_ps SIMD::vmask_ps
-
-template <int mask>
-SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const& a, SIMD128::Integer const& b)
-{
- return SIMD128::castps_si(
- SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Compute plane equation vA * vX + vB * vY + vC
-SIMDINLINE simdscalar vplaneps(simdscalar const& vA,
- simdscalar const& vB,
- simdscalar const& vC,
- simdscalar const& vX,
- simdscalar const& vY)
-{
- simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);
- vOut = _simd_fmadd_ps(vB, vY, vOut);
- return vOut;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Compute plane equation vA * vX + vB * vY + vC
-SIMDINLINE simd4scalar vplaneps(simd4scalar const& vA,
- simd4scalar const& vB,
- simd4scalar const& vC,
- simd4scalar const& vX,
- simd4scalar const& vY)
-{
- simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC);
- vOut = _simd128_fmadd_ps(vB, vY, vOut);
- return vOut;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Interpolates a single component.
-/// @param vI - barycentric I
-/// @param vJ - barycentric J
-/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template <UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalar InterpolateComponent(simdscalar const& vI,
- simdscalar const& vJ,
- const float* pInterpBuffer)
-{
- const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
- const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
- const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
-
- if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))
- {
- // Ensure constant attribs are constant. Required for proper
- // 3D resource copies.
- return _simd_broadcast_ss(pInterpA);
- }
-
- simdscalar vA = _simd_broadcast_ss(pInterpA);
- simdscalar vB = _simd_broadcast_ss(pInterpB);
- simdscalar vC = _simd_broadcast_ss(pInterpC);
-
- simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);
- vC = _simd_mul_ps(vk, vC);
-
- return vplaneps(vA, vB, vC, vI, vJ);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Interpolates a single component (flat shade).
-/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template <UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalar InterpolateComponentFlat(const float* pInterpBuffer)
-{
- const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
-
- simdscalar vA = _simd_broadcast_ss(pInterpA);
-
- return vA;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Interpolates a single component (flat shade).
-/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template <UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalari InterpolateComponentFlatInt(const uint32_t* pInterpBuffer)
-{
- const uint32_t interpA = pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
-
- simdscalari vA = _simd_set1_epi32(interpA);
-
- return vA;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Interpolates a single component.
-/// @param vI - barycentric I
-/// @param vJ - barycentric J
-/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template <UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const& vI,
- simd4scalar const& vJ,
- const float* pInterpBuffer)
-{
- const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
- const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
- const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
-
- if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))
- {
- // Ensure constant attribs are constant. Required for proper
- // 3D resource copies.
- return SIMD128::broadcast_ss(pInterpA);
- }
-
- simd4scalar vA = SIMD128::broadcast_ss(pInterpA);
- simd4scalar vB = SIMD128::broadcast_ss(pInterpB);
- simd4scalar vC = SIMD128::broadcast_ss(pInterpC);
-
- simd4scalar vk = SIMD128::sub_ps(SIMD128::sub_ps(SIMD128::set1_ps(1.0f), vI), vJ);
- vC = SIMD128::mul_ps(vk, vC);
-
- return vplaneps(vA, vB, vC, vI, vJ);
-}
-
-static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const& a)
-{
- simd4scalari ai = SIMD128::castps_si(a);
- return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff)));
-}
-
-static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const& a)
-{
- simdscalari ai = _simd_castps_si(a);
- return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
-}
-
-#include "simd16intrin.h"
-
-#endif //__SWR_SIMDINTRIN_H__
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
deleted file mode 100644
index 53793ba101c..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ /dev/null
@@ -1,234 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#pragma once
-
-#include "simdlib_types.hpp"
-
-// For documentation, please see the following include...
-// #include "simdlib_interface.hpp"
-
-namespace SIMDImpl
-{
- namespace SIMD128Impl
- {
-#if SIMD_ARCH >= SIMD_ARCH_AVX
- struct AVXImpl
- {
-#define __SIMD_LIB_AVX_HPP__
-#include "simdlib_128_avx.inl"
-#undef __SIMD_LIB_AVX_HPP__
- }; // struct AVXImpl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX2
- struct AVX2Impl : AVXImpl
- {
-#define __SIMD_LIB_AVX2_HPP__
-#include "simdlib_128_avx2.inl"
-#undef __SIMD_LIB_AVX2_HPP__
- }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- struct AVX512Impl : AVX2Impl
- {
-#if defined(SIMD_OPT_128_AVX512)
-#define __SIMD_LIB_AVX512_HPP__
-#include "simdlib_128_avx512.inl"
-#if defined(SIMD_ARCH_KNIGHTS)
-#include "simdlib_128_avx512_knights.inl"
-#else // optimize for core
-#include "simdlib_128_avx512_core.inl"
-#endif // defined(SIMD_ARCH_KNIGHTS)
-#undef __SIMD_LIB_AVX512_HPP__
-#endif // SIMD_OPT_128_AVX512
- }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
-
- struct Traits : SIMDImpl::Traits
- {
-#if SIMD_ARCH == SIMD_ARCH_AVX
- using IsaImpl = AVXImpl;
-#elif SIMD_ARCH == SIMD_ARCH_AVX2
- using IsaImpl = AVX2Impl;
-#elif SIMD_ARCH == SIMD_ARCH_AVX512
- using IsaImpl = AVX512Impl;
-#else
-#error Invalid value for SIMD_ARCH
-#endif
-
- using Float = SIMD128Impl::Float;
- using Double = SIMD128Impl::Double;
- using Integer = SIMD128Impl::Integer;
- using Vec4 = SIMD128Impl::Vec4;
- using Mask = SIMD128Impl::Mask;
- };
- } // namespace SIMD128Impl
-
- namespace SIMD256Impl
- {
-#if SIMD_ARCH >= SIMD_ARCH_AVX
- struct AVXImpl
- {
-#define __SIMD_LIB_AVX_HPP__
-#include "simdlib_256_avx.inl"
-#undef __SIMD_LIB_AVX_HPP__
- }; // struct AVXImpl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX2
- struct AVX2Impl : AVXImpl
- {
-#define __SIMD_LIB_AVX2_HPP__
-#include "simdlib_256_avx2.inl"
-#undef __SIMD_LIB_AVX2_HPP__
- }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- struct AVX512Impl : AVX2Impl
- {
-#if defined(SIMD_OPT_256_AVX512)
-#define __SIMD_LIB_AVX512_HPP__
-#include "simdlib_256_avx512.inl"
-#if defined(SIMD_ARCH_KNIGHTS)
-#include "simdlib_256_avx512_knights.inl"
-#else // optimize for core
-#include "simdlib_256_avx512_core.inl"
-#endif // defined(SIMD_ARCH_KNIGHTS)
-#undef __SIMD_LIB_AVX512_HPP__
-#endif // SIMD_OPT_256_AVX512
- }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
-
- struct Traits : SIMDImpl::Traits
- {
-#if SIMD_ARCH == SIMD_ARCH_AVX
- using IsaImpl = AVXImpl;
-#elif SIMD_ARCH == SIMD_ARCH_AVX2
- using IsaImpl = AVX2Impl;
-#elif SIMD_ARCH == SIMD_ARCH_AVX512
- using IsaImpl = AVX512Impl;
-#else
-#error Invalid value for SIMD_ARCH
-#endif
-
- using Float = SIMD256Impl::Float;
- using Double = SIMD256Impl::Double;
- using Integer = SIMD256Impl::Integer;
- using Vec4 = SIMD256Impl::Vec4;
- using Mask = SIMD256Impl::Mask;
- };
- } // namespace SIMD256Impl
-
- namespace SIMD512Impl
- {
-#if SIMD_ARCH >= SIMD_ARCH_AVX
- template <typename SIMD256T>
- struct AVXImplBase
- {
-#define __SIMD_LIB_AVX_HPP__
-#include "simdlib_512_emu.inl"
-#include "simdlib_512_emu_masks.inl"
-#undef __SIMD_LIB_AVX_HPP__
- }; // struct AVXImplBase
- using AVXImpl = AVXImplBase<SIMD256Impl::AVXImpl>;
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX2
- using AVX2Impl = AVXImplBase<SIMD256Impl::AVX2Impl>;
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- struct AVX512Impl : AVXImplBase<SIMD256Impl::AVX512Impl>
- {
-#define __SIMD_LIB_AVX512_HPP__
-#include "simdlib_512_avx512.inl"
-#include "simdlib_512_avx512_masks.inl"
-#if defined(SIMD_ARCH_KNIGHTS)
-#include "simdlib_512_avx512_knights.inl"
-#include "simdlib_512_avx512_masks_knights.inl"
-#else // optimize for core
-#include "simdlib_512_avx512_core.inl"
-#include "simdlib_512_avx512_masks_core.inl"
-#endif // defined(SIMD_ARCH_KNIGHTS)
-#undef __SIMD_LIB_AVX512_HPP__
- }; // struct AVX512ImplBase
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
-
- struct Traits : SIMDImpl::Traits
- {
-#if SIMD_ARCH == SIMD_ARCH_AVX
- using IsaImpl = AVXImpl;
-#elif SIMD_ARCH == SIMD_ARCH_AVX2
- using IsaImpl = AVX2Impl;
-#elif SIMD_ARCH == SIMD_ARCH_AVX512
- using IsaImpl = AVX512Impl;
-#else
-#error Invalid value for SIMD_ARCH
-#endif
-
- using Float = SIMD512Impl::Float;
- using Double = SIMD512Impl::Double;
- using Integer = SIMD512Impl::Integer;
- using Vec4 = SIMD512Impl::Vec4;
- using Mask = SIMD512Impl::Mask;
- };
- } // namespace SIMD512Impl
-} // namespace SIMDImpl
-
-template <typename Traits>
-struct SIMDBase : Traits::IsaImpl
-{
- using CompareType = typename Traits::CompareType;
- using ScaleFactor = typename Traits::ScaleFactor;
- using RoundMode = typename Traits::RoundMode;
- using SIMD = typename Traits::IsaImpl;
- using Float = typename Traits::Float;
- using Double = typename Traits::Double;
- using Integer = typename Traits::Integer;
- using Vec4 = typename Traits::Vec4;
- using Mask = typename Traits::Mask;
-}; // struct SIMDBase
-
-using SIMD128 = SIMDBase<SIMDImpl::SIMD128Impl::Traits>;
-using SIMD256 = SIMDBase<SIMDImpl::SIMD256Impl::Traits>;
-using SIMD512 = SIMDBase<SIMDImpl::SIMD512Impl::Traits>;
-
-template <typename SIMD_T>
-using CompareType = typename SIMD_T::CompareType;
-template <typename SIMD_T>
-using ScaleFactor = typename SIMD_T::ScaleFactor;
-template <typename SIMD_T>
-using RoundMode = typename SIMD_T::RoundMode;
-template <typename SIMD_T>
-using Float = typename SIMD_T::Float;
-template <typename SIMD_T>
-using Double = typename SIMD_T::Double;
-template <typename SIMD_T>
-using Integer = typename SIMD_T::Integer;
-template <typename SIMD_T>
-using Vec4 = typename SIMD_T::Vec4;
-template <typename SIMD_T>
-using Mask = typename SIMD_T::Mask;
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
deleted file mode 100644
index 83ce967373c..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
+++ /dev/null
@@ -1,593 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD128 AVX (1) implementation
-//============================================================================
-
-#define SIMD_WRAPPER_1(op) \
- static SIMDINLINE Float SIMDCALL op(Float a) { return _mm_##op(a); }
-
-#define SIMD_WRAPPER_2(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm_##op(a, b); }
-
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm_##op(a, b); }
-
-#define SIMD_WRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm_##op(a, b, ImmT); \
- }
-
-#define SIMD_DWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return _mm_##op(a, b, ImmT); \
- }
-
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm_##op(a, b, c); }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm_##op(a); }
-
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return intrin(a, ImmT); \
- }
-#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm_##op)
-
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return intrin(a, b); }
-
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm_##op(a, b); }
-
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return castps_si(intrin(castsi_ps(a), castsi_ps(b))); \
- }
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return _mm_##op(a, b, ImmT); \
- }
-
-//-----------------------------------------------------------------------
-// Single precision floating point arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
-
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c) // return (a * b) + c
-{
- return add_ps(mul_ps(a, b), c);
-}
-static SIMDINLINE Float SIMDCALL fmsub_ps(Float a, Float b, Float c) // return (a * b) - c
-{
- return sub_ps(mul_ps(a, b), c);
-}
-
-template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float a)
-{
- return _mm_round_ps(a, static_cast<int>(RMT));
-}
-
-static SIMDINLINE Float SIMDCALL ceil_ps(Float a)
-{
- return round_ps<RoundMode::CEIL_NOEXC>(a);
-}
-static SIMDINLINE Float SIMDCALL floor_ps(Float a)
-{
- return round_ps<RoundMode::FLOOR_NOEXC>(a);
-}
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_IWRAPPER_2(mullo_epi32);
-SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
-SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-SIMD_IWRAPPER_2_(and_si, _mm_and_si128); // return a & b (int)
-SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-SIMD_IWRAPPER_2_(andnot_si, _mm_andnot_si128); // return (~a) & b (int)
-SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-SIMD_IWRAPPER_2_(or_si, _mm_or_si128); // return a | b (int)
-SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-SIMD_IWRAPPER_2_(xor_si, _mm_xor_si128); // return a ^ b (int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_1I(slli_epi64); // return a << ImmT
-
-static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vB) // return a << b (uint32)
-{
- int32_t a, count;
- a = _mm_extract_epi32(vA, 0);
- count = _mm_extract_epi32(vB, 0);
- a <<= count;
- vA = _mm_insert_epi32(vA, a, 0);
-
- a = _mm_extract_epi32(vA, 1);
- count = _mm_extract_epi32(vB, 1);
- a <<= count;
- vA = _mm_insert_epi32(vA, a, 1);
-
- a = _mm_extract_epi32(vA, 2);
- count = _mm_extract_epi32(vB, 2);
- a <<= count;
- vA = _mm_insert_epi32(vA, a, 2);
-
- a = _mm_extract_epi32(vA, 3);
- count = _mm_extract_epi32(vB, 3);
- a <<= count;
- vA = _mm_insert_epi32(vA, a, 3);
-
- return vA;
-}
-
-SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_1I_(srli_si, _mm_srli_si128); // return a >> (ImmT*8) (uint)
-
-static SIMDINLINE Integer SIMDCALL srl_epi64(Integer a, Integer n)
-{
- return _mm_srl_epi64(a, n);
-}
-
-template <int ImmT> // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
-{
- return castsi_ps(srli_si<ImmT>(castps_si(a)));
-}
-
-static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vB) // return a >> b (uint32)
-{
- int32_t a, count;
- a = _mm_extract_epi32(vA, 0);
- count = _mm_extract_epi32(vB, 0);
- a >>= count;
- vA = _mm_insert_epi32(vA, a, 0);
-
- a = _mm_extract_epi32(vA, 1);
- count = _mm_extract_epi32(vB, 1);
- a >>= count;
- vA = _mm_insert_epi32(vA, a, 1);
-
- a = _mm_extract_epi32(vA, 2);
- count = _mm_extract_epi32(vB, 2);
- a >>= count;
- vA = _mm_insert_epi32(vA, a, 2);
-
- a = _mm_extract_epi32(vA, 3);
- count = _mm_extract_epi32(vB, 3);
- a >>= count;
- vA = _mm_insert_epi32(vA, a, 3);
-
- return vA;
-}
-
-//-----------------------------------------------------------------------
-// Conversion operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
-{
- return _mm_castpd_ps(a);
-}
-
-static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
-{
- return _mm_castps_si128(a);
-}
-
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
-{
- return _mm_castsi128_pd(a);
-}
-
-static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
-{
- return _mm_castps_pd(a);
-}
-
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
-{
- return _mm_castsi128_ps(a);
-}
-
-static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a (int32 --> float)
-{
- return _mm_cvtepi32_ps(a);
-}
-
-static SIMDINLINE int32_t SIMDCALL cvtsi128_si32(Integer a) // return a.v[0]
-{
- return _mm_cvtsi128_si32(a);
-}
-
-static SIMDINLINE Integer SIMDCALL cvtsi32_si128(int32_t n) // return a[0] = n, a[1]...a[3] = 0
-{
- return _mm_cvtsi32_si128(n);
-}
-
-SIMD_IWRAPPER_1(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-SIMD_IWRAPPER_1(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
-SIMD_IWRAPPER_1(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
-SIMD_IWRAPPER_1(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
-SIMD_IWRAPPER_1(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
-
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32)
-{
- return _mm_cvtps_epi32(a);
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32)
-{
- return _mm_cvttps_epi32(a);
-}
-
-//-----------------------------------------------------------------------
-// Comparison operations
-//-----------------------------------------------------------------------
-template <CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b
-{
- return _mm_cmp_ps(a, b, static_cast<const int>(CmpTypeT));
-}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::LT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::GT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::NEQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::EQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::GE_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::LE_OQ>(a, b);
-}
-
-SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-
-static SIMDINLINE bool SIMDCALL testz_ps(Float a,
- Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
-{
- return 0 != _mm_testz_ps(a, b);
-}
-
-static SIMDINLINE bool SIMDCALL testz_si(Integer a,
- Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
-{
- return 0 != _mm_testz_si128(a, b);
-}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
-SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
- Integer b,
- Float mask) // return mask ? b : a (int)
-{
- return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
-}
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
- Integer b,
- Integer mask) // return mask ? b : a (int)
-{
- return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
-}
-
-static SIMDINLINE Float SIMDCALL
- broadcast_ss(float const* p) // return *p (all elements in vector get same value)
-{
- return _mm_broadcast_ss(p);
-}
-
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm_packus_epi32 and _mm512_packus_epi32
-
-static SIMDINLINE Integer SIMDCALL
- permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- return castps_si(_mm_permutevar_ps(castsi_ps(a), swiz));
-}
-
-static SIMDINLINE Float SIMDCALL
- permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- return _mm_permutevar_ps(a, swiz);
-}
-
-SIMD_IWRAPPER_1I(shuffle_epi32);
-
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b) = delete;
-
-SIMD_IWRAPPER_2(shuffle_epi8);
-SIMD_DWRAPPER_2I(shuffle_pd);
-SIMD_WRAPPER_2I(shuffle_ps);
-SIMD_IWRAPPER_2(unpackhi_epi16);
-
-// SIMD_IFWRAPPER_2(unpackhi_epi32, _mm_unpackhi_ps);
-static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
-{
- return castps_si(_mm_unpackhi_ps(castsi_ps(a), castsi_ps(b)));
-}
-
-SIMD_IWRAPPER_2(unpackhi_epi64);
-SIMD_IWRAPPER_2(unpackhi_epi8);
-SIMD_DWRAPPER_2(unpackhi_pd);
-SIMD_WRAPPER_2(unpackhi_ps);
-SIMD_IWRAPPER_2(unpacklo_epi16);
-SIMD_IFWRAPPER_2(unpacklo_epi32, _mm_unpacklo_ps);
-SIMD_IWRAPPER_2(unpacklo_epi64);
-SIMD_IWRAPPER_2(unpacklo_epi8);
-SIMD_DWRAPPER_2(unpacklo_pd);
-SIMD_WRAPPER_2(unpacklo_ps);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- uint32_t* pOffsets = (uint32_t*)&idx;
- Float vResult;
- float* pResult = (float*)&vResult;
- for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
- {
- uint32_t offset = pOffsets[i];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[i] = *(float const*)(((uint8_t const*)p + offset));
- }
-
- return vResult;
-}
-
-static SIMDINLINE Float SIMDCALL
- load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
-{
- return broadcast_ss(p);
-}
-
-static SIMDINLINE Float SIMDCALL
- load_ps(float const* p) // return *p (loads SIMD width elements from memory)
-{
- return _mm_load_ps(p);
-}
-
-static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
-{
- return _mm_load_si128(&p->v);
-}
-
-static SIMDINLINE Float SIMDCALL
- loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
-{
- return _mm_loadu_ps(p);
-}
-
-static SIMDINLINE Integer SIMDCALL
- loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
-{
- return _mm_lddqu_si128(&p->v);
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
- uint32_t* pOffsets = (uint32_t*)&idx;
- Float vResult = old;
- float* pResult = (float*)&vResult;
- unsigned long index;
- uint32_t umask = movemask_ps(mask);
- while (_BitScanForward(&index, umask))
- {
- umask &= ~(1 << index);
- uint32_t offset = pOffsets[index];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[index] = *(float const*)(((uint8_t const*)p + offset));
- }
-
- return vResult;
-}
-
-static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
-{
- _mm_maskstore_ps(p, mask, src);
-}
-
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
-{
- return static_cast<uint32_t>(_mm_movemask_epi8(a));
-}
-
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a)
-{
- return static_cast<uint32_t>(_mm_movemask_pd(a));
-}
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a)
-{
- return static_cast<uint32_t>(_mm_movemask_ps(a));
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value)
-{
- return _mm_set1_epi32(i);
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements are same value)
-{
- return _mm_set1_epi8(i);
-}
-
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
-{
- return _mm_set1_ps(f);
-}
-
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
-{
- return _mm_setzero_ps();
-}
-
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
-{
- return _mm_setzero_si128();
-}
-
-static SIMDINLINE void SIMDCALL
- store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
-{
- _mm_store_ps(p, a);
-}
-
-static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
-{
- _mm_store_si128(&p->v, a);
-}
-
-static SIMDINLINE void SIMDCALL
- storeu_si(Integer* p, Integer a) // *p = a (same as store_si but allows for unaligned mem)
-{
- _mm_storeu_si128(&p->v, a);
-}
-
-static SIMDINLINE void SIMDCALL
- stream_ps(float* p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache)
-{
- _mm_stream_ps(p, a);
-}
-
-static SIMDINLINE Float SIMDCALL set_ps(float in3, float in2, float in1, float in0)
-{
- return _mm_set_ps(in3, in2, in1, in0);
-}
-
-static SIMDINLINE Integer SIMDCALL set_epi32(int in3, int in2, int in1, int in0)
-{
- return _mm_set_epi32(in3, in2, in1, in0);
-}
-
-template <int ImmT>
-static SIMDINLINE float SIMDCALL extract_ps(Float a)
-{
- int tmp = _mm_extract_ps(a, ImmT);
- return *reinterpret_cast<float*>(&tmp);
-}
-
-static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
-{
- Integer vec = set1_epi32(mask);
- const Integer bit = set_epi32(0x08, 0x04, 0x02, 0x01);
- vec = and_si(vec, bit);
- vec = cmplt_epi32(setzero_si(), vec);
- return castsi_ps(vec);
-}
-
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_2
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IFWRAPPER_2
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_1I
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl
deleted file mode 100644
index 0da66ebb56c..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl
+++ /dev/null
@@ -1,66 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX2_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD4 AVX (2) implementation
-//
-// Since this implementation inherits from the AVX (1) implementation,
-// the only operations below ones that replace AVX (1) operations.
-// Only 2 shifts and 2 gathers were introduced with AVX 2
-// Also, add native support for FMA operations
-//============================================================================
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm_##op(a, b, c); }
-
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-
-static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vB) // return a << b (uint32)
-{
- return _mm_sllv_epi32(vA, vB);
-}
-
-static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vB) // return a >> b (uint32)
-{
- return _mm_srlv_epi32(vA, vB);
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return _mm_i32gather_ps(p, idx, static_cast<const int>(ScaleT));
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
- return _mm_mask_i32gather_ps(old, p, idx, mask, static_cast<const int>(ScaleT));
-}
-
-#undef SIMD_WRAPPER_3
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
deleted file mode 100644
index b076daa080a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
+++ /dev/null
@@ -1,368 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD128 AVX (512) implementation
-//
-// Since this implementation inherits from the AVX (2) implementation,
-// the only operations below ones that replace AVX (2) operations.
-// These use native AVX512 instructions with masking to enable a larger
-// register set.
-//============================================================================
-
-private:
-static SIMDINLINE __m512 __conv(Float r)
-{
- return _mm512_castps128_ps512(r.v);
-}
-static SIMDINLINE __m512d __conv(Double r)
-{
- return _mm512_castpd128_pd512(r.v);
-}
-static SIMDINLINE __m512i __conv(Integer r)
-{
- return _mm512_castsi128_si512(r.v);
-}
-static SIMDINLINE Float __conv(__m512 r)
-{
- return _mm512_castps512_ps128(r);
-}
-static SIMDINLINE Double __conv(__m512d r)
-{
- return _mm512_castpd512_pd128(r);
-}
-static SIMDINLINE Integer __conv(__m512i r)
-{
- return _mm512_castsi512_si128(r);
-}
-
-public:
-#define SIMD_WRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
-
-#define SIMD_WRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
-
-#define SIMD_WRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
-
-#define SIMD_WRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
- }
-
-#define SIMD_WRAPPER_3_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
- }
-#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
-
-#define SIMD_DWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
- }
-
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xf))
-
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xf))
-
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xf))
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
- }
-
-//-----------------------------------------------------------------------
-// Single precision floating point arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1_(rcp_ps, rcp14_ps, __mmask16(0xf)); // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xf)); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
-
-// SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_IWRAPPER_2_32(mullo_epi32);
-SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
-
-// SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
-// SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xf)); // return a & b (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_epi32, __mmask16(0xf)); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xf)); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xf)); // return a ^ b (int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2_32(sllv_epi32); // return a << b (uint32)
-SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32)
-
-// use AVX2 version
-// SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
-
-//-----------------------------------------------------------------------
-// Conversion operations (Use AVX2 versions)
-//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_1L(cvtepu8_epi16, 0xffff); // return (int16)a (uint8 --> int16)
-// SIMD_IWRAPPER_1L(cvtepu8_epi32, 0xff); // return (int32)a (uint8 --> int32)
-// SIMD_IWRAPPER_1L(cvtepu16_epi32, 0xff); // return (int32)a (uint16 --> int32)
-// SIMD_IWRAPPER_1L(cvtepu16_epi64, 0xf); // return (int64)a (uint16 --> int64)
-// SIMD_IWRAPPER_1L(cvtepu32_epi64, 0xf); // return (int64)a (uint32 --> int64)
-
-//-----------------------------------------------------------------------
-// Comparison operations (Use AVX2 versions
-//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi8); // return a == b (int8)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi16); // return a == b (int16)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi32); // return a == b (int32)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi64); // return a == b (int64)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,); // return a > b (int8)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi16); // return a > b (int16)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi32); // return a > b (int32)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi64); // return a > b (int64)
-//
-// static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32)
-//{
-// return cmpgt_epi32(b, a);
-//}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16
-// and _mm512_packs_epi16 SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation
-// for _mm256_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2_8(packus_epi16); // uint16 -->
-// uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for
-// _mm256_packus_epi32 and _mm512_packus_epi32 SIMD_IWRAPPER_2_(permute_epi32,
-// permutevar8x32_epi32);
-
-// static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for
-// each 32-bit lane i (float)
-//{
-// return _mm256_permutevar8x32_ps(a, swiz);
-//}
-
-SIMD_IWRAPPER_1I_32(shuffle_epi32);
-// template<int ImmT>
-// static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
-//{
-// return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
-//}
-// SIMD_IWRAPPER_2(shuffle_epi8);
-SIMD_IWRAPPER_2_32(unpackhi_epi32);
-SIMD_IWRAPPER_2_32(unpacklo_epi32);
-
-// SIMD_IWRAPPER_2_16(unpackhi_epi16);
-// SIMD_IWRAPPER_2_64(unpackhi_epi64);
-// SIMD_IWRAPPER_2_8(unpackhi_epi8);
-// SIMD_IWRAPPER_2_16(unpacklo_epi16);
-// SIMD_IWRAPPER_2_64(unpacklo_epi64);
-// SIMD_IWRAPPER_2_8(unpacklo_epi8);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL
- load_ps(float const* p) // return *p (loads SIMD width elements from memory)
-{
- return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
-}
-
-static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
-{
- return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
-}
-
-static SIMDINLINE Float SIMDCALL
- loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
-{
- return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
-}
-
-static SIMDINLINE Integer SIMDCALL
- loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
-{
- return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return __conv(_mm512_mask_i32gather_ps(
- _mm512_setzero_ps(), __mmask16(0xf), __conv(idx), p, static_cast<int>(ScaleT)));
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
- __mmask16 m = 0xf;
- m = _mm512_mask_test_epi32_mask(
- m, _mm512_castps_si512(__conv(mask)), _mm512_set1_epi32(0x80000000));
- return __conv(
- _mm512_mask_i32gather_ps(__conv(old), m, __conv(idx), p, static_cast<int>(ScaleT)));
-}
-
-// static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
-// {
-// __mmask64 m = 0xffffull;
-// return static_cast<uint32_t>(
-// _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
-// }
-
-static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
-{
- __mmask16 m = 0xf;
- m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
- _mm512_mask_storeu_ps(p, m, __conv(src));
-}
-
-static SIMDINLINE void SIMDCALL
- store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
-{
- _mm512_mask_storeu_ps(p, __mmask16(0xf), __conv(a));
-}
-
-static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
-{
- _mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a));
-}
-
-static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
-{
- return castsi_ps(__conv(_mm512_maskz_set1_epi32(__mmask16(mask & 0xf), -1)));
-}
-
-//=======================================================================
-// Legacy interface (available only in SIMD256 width)
-//=======================================================================
-
-#undef SIMD_WRAPPER_1_
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_1I_
-#undef SIMD_WRAPPER_1I
-#undef SIMD_WRAPPER_2_
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3_
-#undef SIMD_WRAPPER_3
-#undef SIMD_DWRAPPER_1_
-#undef SIMD_DWRAPPER_1
-#undef SIMD_DWRAPPER_1I_
-#undef SIMD_DWRAPPER_1I
-#undef SIMD_DWRAPPER_2_
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_IWRAPPER_1_
-#undef SIMD_IWRAPPER_1_8
-#undef SIMD_IWRAPPER_1_16
-#undef SIMD_IWRAPPER_1_32
-#undef SIMD_IWRAPPER_1_64
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_1I_8
-#undef SIMD_IWRAPPER_1I_16
-#undef SIMD_IWRAPPER_1I_32
-#undef SIMD_IWRAPPER_1I_64
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2_8
-#undef SIMD_IWRAPPER_2_16
-#undef SIMD_IWRAPPER_2_32
-#undef SIMD_IWRAPPER_2_64
-#undef SIMD_IWRAPPER_2I
-//#undef SIMD_IWRAPPER_2I_8
-//#undef SIMD_IWRAPPER_2I_16
-//#undef SIMD_IWRAPPER_2I_32
-//#undef SIMD_IWRAPPER_2I_64
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
deleted file mode 100644
index 16e59c4decb..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
+++ /dev/null
@@ -1,196 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD128 AVX (512) implementation
-//
-// Since this implementation inherits from the AVX (2) implementation,
-// the only operations below ones that replace AVX (2) operations.
-// These use native AVX512 instructions with masking to enable a larger
-// register set.
-//============================================================================
-
-#define SIMD_WRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
-
-#define SIMD_WRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
-
-#define SIMD_WRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
-
-#define SIMD_WRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
- }
-
-#define SIMD_WRAPPER_3_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
- }
-#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
-
-#define SIMD_DWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
-
-#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
-
-#define SIMD_DWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
-
-#define SIMD_DWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
- }
-
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
- }
-
-SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and
- // _mm512_packs_epi16
-SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and
- // _mm512_packs_epi32
-SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and
- // _mm512_packus_epi16
-SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
- // _mm512_packus_epi32
-SIMD_IWRAPPER_2_16(unpackhi_epi16);
-SIMD_IWRAPPER_2_64(unpackhi_epi64);
-SIMD_IWRAPPER_2_8(unpackhi_epi8);
-SIMD_IWRAPPER_2_16(unpacklo_epi16);
-SIMD_IWRAPPER_2_64(unpacklo_epi64);
-SIMD_IWRAPPER_2_8(unpacklo_epi8);
-
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
-{
- __mmask64 m = 0xffffull;
- return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
-}
-
-#undef SIMD_WRAPPER_1_
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_1I_
-#undef SIMD_WRAPPER_1I
-#undef SIMD_WRAPPER_2_
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3_
-#undef SIMD_WRAPPER_3
-#undef SIMD_DWRAPPER_1_
-#undef SIMD_DWRAPPER_1
-#undef SIMD_DWRAPPER_1I_
-#undef SIMD_DWRAPPER_1I
-#undef SIMD_DWRAPPER_2_
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_IWRAPPER_1_
-#undef SIMD_IWRAPPER_1_8
-#undef SIMD_IWRAPPER_1_16
-#undef SIMD_IWRAPPER_1_32
-#undef SIMD_IWRAPPER_1_64
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_1I_8
-#undef SIMD_IWRAPPER_1I_16
-#undef SIMD_IWRAPPER_1I_32
-#undef SIMD_IWRAPPER_1I_64
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2_8
-#undef SIMD_IWRAPPER_2_16
-#undef SIMD_IWRAPPER_2_32
-#undef SIMD_IWRAPPER_2_64
-#undef SIMD_IWRAPPER_2I
-//#undef SIMD_IWRAPPER_2I_8
-//#undef SIMD_IWRAPPER_2I_16
-//#undef SIMD_IWRAPPER_2I_32
-//#undef SIMD_IWRAPPER_2I_64
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
deleted file mode 100644
index 1b6592e2003..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
+++ /dev/null
@@ -1,34 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD128 AVX (512) implementation for Knights Family
-//
-// Since this implementation inherits from the AVX512Base implementation,
-// the only operations below ones that replace AVX512F / AVX512CD operations
-// These use native AVX512 instructions with masking to enable a larger
-// register set.
-//============================================================================
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
deleted file mode 100644
index d0c3ecd4cf3..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
+++ /dev/null
@@ -1,826 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-using SIMD128T = SIMD128Impl::AVXImpl;
-
-//============================================================================
-// SIMD256 AVX (1) implementation
-//============================================================================
-
-#define SIMD_WRAPPER_1(op) \
- static SIMDINLINE Float SIMDCALL op(Float const& a) { return _mm256_##op(a); }
-
-#define SIMD_WRAPPER_2(op) \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
- { \
- return _mm256_##op(a, b); \
- }
-
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double const& a, Double const& b) \
- { \
- return _mm256_##op(a, b); \
- }
-
-#define SIMD_WRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
- { \
- return _mm256_##op(a, b, ImmT); \
- }
-
-#define SIMD_DWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double const& a, Double const& b) \
- { \
- return _mm256_##op(a, b, ImmT); \
- }
-
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b, Float const& c) \
- { \
- return _mm256_##op(a, b, c); \
- }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) { return _mm256_##op(a); }
-
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return _mm256_##op(a, b); \
- }
-
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return castps_si(intrin(castsi_ps(a), castsi_ps(b))); \
- }
-
-#define SIMD_IFWRAPPER_2I(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return castps_si(intrin(castsi_ps(a), castsi_ps(b), ImmT)); \
- }
-
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return _mm256_##intrin(a, b, ImmT); \
- }
-#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-
-#define SIMD_IWRAPPER_3(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b, Integer const& c) \
- { \
- return _mm256_##op(a, b, c); \
- }
-
-// emulated integer simd
-#define SIMD_EMU_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return Integer{ \
- SIMD128T::op(a.v4[0]), \
- SIMD128T::op(a.v4[1]), \
- }; \
- }
-#define SIMD_EMU_IWRAPPER_1L(op, shift) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return Integer{ \
- SIMD128T::op(a.v4[0]), \
- SIMD128T::op(SIMD128T::template srli_si<shift>(a.v4[0])), \
- }; \
- } \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer const& a) \
- { \
- return Integer{ \
- SIMD128T::op(a), \
- SIMD128T::op(SIMD128T::template srli_si<shift>(a)), \
- }; \
- }
-
-#define SIMD_EMU_IWRAPPER_1I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return Integer{ \
- SIMD128T::template op<ImmT>(a.v4[0]), \
- SIMD128T::template op<ImmT>(a.v4[1]), \
- }; \
- }
-
-#define SIMD_EMU_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return Integer{ \
- SIMD128T::op(a.v4[0], b.v4[0]), \
- SIMD128T::op(a.v4[1], b.v4[1]), \
- }; \
- }
-
-#define SIMD_EMU_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return Integer{ \
- SIMD128T::template op<ImmT>(a.v4[0], b.v[0]), \
- SIMD128T::template op<ImmT>(a.v4[1], b.v[1]), \
- }; \
- }
-
-//-----------------------------------------------------------------------
-// Single precision floating point arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float const& a,
- Float const& b,
- Float const& c) // return (a * b) + c
-{
- return add_ps(mul_ps(a, b), c);
-}
-
-static SIMDINLINE Float SIMDCALL fmsub_ps(Float const& a,
- Float const& b,
- Float const& c) // return (a * b) - c
-{
- return sub_ps(mul_ps(a, b), c);
-}
-
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
-
-template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float const& a)
-{
- return _mm256_round_ps(a, static_cast<int>(RMT));
-}
-
-static SIMDINLINE Float SIMDCALL ceil_ps(Float const& a)
-{
- return round_ps<RoundMode::CEIL_NOEXC>(a);
-}
-static SIMDINLINE Float SIMDCALL floor_ps(Float const& a)
-{
- return round_ps<RoundMode::FLOOR_NOEXC>(a);
-}
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_EMU_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
-SIMD_EMU_IWRAPPER_2(add_epi32); // return a + b (int32)
-SIMD_EMU_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_EMU_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_EMU_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_EMU_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_EMU_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_EMU_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_EMU_IWRAPPER_2(mul_epi32); // return a * b (int32)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_EMU_IWRAPPER_2(mullo_epi32);
-SIMD_EMU_IWRAPPER_2(sub_epi32); // return a - b (int32)
-SIMD_EMU_IWRAPPER_2(sub_epi64); // return a - b (int64)
-SIMD_EMU_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-SIMD_IFWRAPPER_2(and_si, _mm256_and_ps); // return a & b (int)
-SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-SIMD_IFWRAPPER_2(andnot_si, _mm256_andnot_ps); // return (~a) & b (int)
-SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-SIMD_IFWRAPPER_2(or_si, _mm256_or_ps); // return a | b (int)
-SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-SIMD_IFWRAPPER_2(xor_si, _mm256_xor_ps); // return a ^ b (int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-SIMD_EMU_IWRAPPER_1I(slli_epi32); // return a << ImmT
-
-static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer const& vA,
- Integer const& vCount) // return a << b (uint32)
-{
- int32_t aHi, aLow, countHi, countLow;
- __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
- __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
- __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
- __m128i vCountLow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 0));
-
- aHi = _mm_extract_epi32(vAHi, 0);
- countHi = _mm_extract_epi32(vCountHi, 0);
- aHi <<= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 0);
-
- aLow = _mm_extract_epi32(vALow, 0);
- countLow = _mm_extract_epi32(vCountLow, 0);
- aLow <<= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 0);
-
- aHi = _mm_extract_epi32(vAHi, 1);
- countHi = _mm_extract_epi32(vCountHi, 1);
- aHi <<= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 1);
-
- aLow = _mm_extract_epi32(vALow, 1);
- countLow = _mm_extract_epi32(vCountLow, 1);
- aLow <<= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 1);
-
- aHi = _mm_extract_epi32(vAHi, 2);
- countHi = _mm_extract_epi32(vCountHi, 2);
- aHi <<= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 2);
-
- aLow = _mm_extract_epi32(vALow, 2);
- countLow = _mm_extract_epi32(vCountLow, 2);
- aLow <<= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 2);
-
- aHi = _mm_extract_epi32(vAHi, 3);
- countHi = _mm_extract_epi32(vCountHi, 3);
- aHi <<= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 3);
-
- aLow = _mm_extract_epi32(vALow, 3);
- countLow = _mm_extract_epi32(vCountLow, 3);
- aLow <<= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 3);
-
- __m256i ret = _mm256_set1_epi32(0);
- ret = _mm256_insertf128_si256(ret, vAHi, 1);
- ret = _mm256_insertf128_si256(ret, vALow, 0);
- return ret;
-}
-
-SIMD_EMU_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_EMU_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
-SIMD_EMU_IWRAPPER_1I(srli_si); // return a >> (ImmT*8) (uint)
-
-template <int ImmT> // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const& a)
-{
- return castsi_ps(srli_si<ImmT>(castps_si(a)));
-}
-
-static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer const& vA,
- Integer const& vCount) // return a >> b (uint32)
-{
- int32_t aHi, aLow, countHi, countLow;
- __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
- __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
- __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
- __m128i vCountLow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 0));
-
- aHi = _mm_extract_epi32(vAHi, 0);
- countHi = _mm_extract_epi32(vCountHi, 0);
- aHi >>= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 0);
-
- aLow = _mm_extract_epi32(vALow, 0);
- countLow = _mm_extract_epi32(vCountLow, 0);
- aLow >>= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 0);
-
- aHi = _mm_extract_epi32(vAHi, 1);
- countHi = _mm_extract_epi32(vCountHi, 1);
- aHi >>= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 1);
-
- aLow = _mm_extract_epi32(vALow, 1);
- countLow = _mm_extract_epi32(vCountLow, 1);
- aLow >>= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 1);
-
- aHi = _mm_extract_epi32(vAHi, 2);
- countHi = _mm_extract_epi32(vCountHi, 2);
- aHi >>= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 2);
-
- aLow = _mm_extract_epi32(vALow, 2);
- countLow = _mm_extract_epi32(vCountLow, 2);
- aLow >>= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 2);
-
- aHi = _mm_extract_epi32(vAHi, 3);
- countHi = _mm_extract_epi32(vCountHi, 3);
- aHi >>= countHi;
- vAHi = _mm_insert_epi32(vAHi, aHi, 3);
-
- aLow = _mm_extract_epi32(vALow, 3);
- countLow = _mm_extract_epi32(vCountLow, 3);
- aLow >>= countLow;
- vALow = _mm_insert_epi32(vALow, aLow, 3);
-
- __m256i ret = _mm256_set1_epi32(0);
- ret = _mm256_insertf128_si256(ret, vAHi, 1);
- ret = _mm256_insertf128_si256(ret, vALow, 0);
- return ret;
-}
-
-//-----------------------------------------------------------------------
-// Conversion operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double const& a) // return *(Float*)(&a)
-{
- return _mm256_castpd_ps(a);
-}
-
-static SIMDINLINE Integer SIMDCALL castps_si(Float const& a) // return *(Integer*)(&a)
-{
- return _mm256_castps_si256(a);
-}
-
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer const& a) // return *(Double*)(&a)
-{
- return _mm256_castsi256_pd(a);
-}
-
-static SIMDINLINE Double SIMDCALL castps_pd(Float const& a) // return *(Double*)(&a)
-{
- return _mm256_castps_pd(a);
-}
-
-static SIMDINLINE Integer SIMDCALL castpd_si(Double const& a) // return *(Integer*)(&a)
-{
- return _mm256_castpd_si256(a);
-}
-
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer const& a) // return *(Float*)(&a)
-{
- return _mm256_castsi256_ps(a);
-}
-
-static SIMDINLINE Float SIMDCALL
- cvtepi32_ps(Integer const& a) // return (float)a (int32 --> float)
-{
- return _mm256_cvtepi32_ps(a);
-}
-
-SIMD_EMU_IWRAPPER_1L(cvtepu8_epi16, 8); // return (int16)a (uint8 --> int16)
-SIMD_EMU_IWRAPPER_1L(cvtepu8_epi32, 4); // return (int32)a (uint8 --> int32)
-SIMD_EMU_IWRAPPER_1L(cvtepu16_epi32, 8); // return (int32)a (uint16 --> int32)
-SIMD_EMU_IWRAPPER_1L(cvtepu16_epi64, 4); // return (int64)a (uint16 --> int64)
-SIMD_EMU_IWRAPPER_1L(cvtepu32_epi64, 8); // return (int64)a (uint32 --> int64)
-
-static SIMDINLINE Integer SIMDCALL
- cvtps_epi32(Float const& a) // return (int32)a (float --> int32)
-{
- return _mm256_cvtps_epi32(a);
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvttps_epi32(Float const& a) // return (int32)a (rnd_to_zero(float) --> int32)
-{
- return _mm256_cvttps_epi32(a);
-}
-
-//-----------------------------------------------------------------------
-// Comparison operations
-//-----------------------------------------------------------------------
-template <CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float const& a, Float const& b) // return a (CmpTypeT) b
-{
- return _mm256_cmp_ps(a, b, static_cast<const int>(CmpTypeT));
-}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::LT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::GT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::NEQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::EQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::GE_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmple_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::LE_OQ>(a, b);
-}
-
-SIMD_EMU_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-SIMD_EMU_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-
-static SIMDINLINE bool SIMDCALL
- testz_ps(Float const& a, Float const& b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
-{
- return 0 != _mm256_testz_ps(a, b);
-}
-
-static SIMDINLINE bool SIMDCALL
- testz_si(Integer const& a, Integer const& b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
-{
- return 0 != _mm256_testz_si256(a, b);
-}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
-SIMD_IFWRAPPER_2I(blend_epi32, _mm256_blend_ps); // return ImmT ? b : a (int32)
-SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
- Integer const& b,
- Float const& mask) // return mask ? b : a (int)
-{
- return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
-}
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
- Integer const& b,
- Integer const& mask) // return mask ? b : a (int)
-{
- return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
-}
-
-static SIMDINLINE Float SIMDCALL
- broadcast_ss(float const* p) // return *p (all elements in vector get same value)
-{
- return _mm256_broadcast_ss(p);
-}
-
-SIMD_EMU_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_EMU_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_EMU_IWRAPPER_2(
- packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_EMU_IWRAPPER_2(
- packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-
-template <int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
-{
- return _mm256_permute_ps(a, ImmT);
-}
-
-static SIMDINLINE Integer SIMDCALL permute_epi32(
- Integer const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
-{
- Integer result;
-
- // Ugly slow implementation
- uint32_t const* pA = reinterpret_cast<uint32_t const*>(&a);
- uint32_t const* pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
- uint32_t* pResult = reinterpret_cast<uint32_t*>(&result);
-
- for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
- {
- pResult[i] = pA[0xF & pSwiz[i]];
- }
-
- return result;
-}
-
-static SIMDINLINE Float SIMDCALL
- permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- Float result;
-
- // Ugly slow implementation
- float const* pA = reinterpret_cast<float const*>(&a);
- uint32_t const* pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
- float* pResult = reinterpret_cast<float*>(&result);
-
- for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
- {
- pResult[i] = pA[0xF & pSwiz[i]];
- }
-
- return result;
-}
-
-SIMD_WRAPPER_2I(permute2f128_ps);
-SIMD_DWRAPPER_2I(permute2f128_pd);
-SIMD_IWRAPPER_2I_(permute2f128_si, permute2f128_si256);
-
-SIMD_EMU_IWRAPPER_1I(shuffle_epi32);
-
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const& a, Integer const& b)
-{
- return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
-}
-SIMD_EMU_IWRAPPER_2(shuffle_epi8);
-SIMD_DWRAPPER_2I(shuffle_pd);
-SIMD_WRAPPER_2I(shuffle_ps);
-SIMD_EMU_IWRAPPER_2(unpackhi_epi16);
-SIMD_IFWRAPPER_2(unpackhi_epi32, _mm256_unpackhi_ps);
-SIMD_EMU_IWRAPPER_2(unpackhi_epi64);
-SIMD_EMU_IWRAPPER_2(unpackhi_epi8);
-SIMD_DWRAPPER_2(unpackhi_pd);
-SIMD_WRAPPER_2(unpackhi_ps);
-SIMD_EMU_IWRAPPER_2(unpacklo_epi16);
-SIMD_IFWRAPPER_2(unpacklo_epi32, _mm256_unpacklo_ps);
-SIMD_EMU_IWRAPPER_2(unpacklo_epi64);
-SIMD_EMU_IWRAPPER_2(unpacklo_epi8);
-SIMD_DWRAPPER_2(unpacklo_pd);
-SIMD_WRAPPER_2(unpacklo_ps);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- uint32_t* pOffsets = (uint32_t*)&idx;
- Float vResult;
- float* pResult = (float*)&vResult;
- for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
- {
- uint32_t offset = pOffsets[i];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[i] = *(float const*)(((uint8_t const*)p + offset));
- }
-
- return vResult;
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
-sw_i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return i32gather_ps<ScaleT>(p, idx);
-}
-
-static SIMDINLINE Float SIMDCALL
- load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
-{
- return broadcast_ss(p);
-}
-
-static SIMDINLINE Float SIMDCALL
- load_ps(float const* p) // return *p (loads SIMD width elements from memory)
-{
- return _mm256_load_ps(p);
-}
-
-static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
-{
- return _mm256_load_si256(&p->v);
-}
-
-static SIMDINLINE Float SIMDCALL
- loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
-{
- return _mm256_loadu_ps(p);
-}
-
-static SIMDINLINE Integer SIMDCALL
- loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
-{
- return _mm256_lddqu_si256(&p->v);
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
-{
- uint32_t* pOffsets = (uint32_t*)&idx;
- Float vResult = old;
- float* pResult = (float*)&vResult;
- unsigned long index = 0;
- uint32_t umask = movemask_ps(mask);
- while (_BitScanForward(&index, umask))
- {
- umask &= ~(1 << index);
- uint32_t offset = pOffsets[index];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[index] = *(float const*)(((uint8_t const*)p + offset));
- }
-
- return vResult;
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
-sw_mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
-{
- return mask_i32gather_ps<ScaleT>(old, p, idx, mask);
-}
-
-static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
-{
- _mm256_maskstore_ps(p, mask, src);
-}
-
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const& a)
-{
- return SIMD128T::movemask_epi8(a.v4[0]) | (SIMD128T::movemask_epi8(a.v4[1]) << 16);
-}
-
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const& a)
-{
- return static_cast<uint32_t>(_mm256_movemask_pd(a));
-}
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const& a)
-{
- return static_cast<uint32_t>(_mm256_movemask_ps(a));
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value)
-{
- return _mm256_set1_epi32(i);
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements are same value)
-{
- return _mm256_set1_epi8(i);
-}
-
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
-{
- return _mm256_set1_ps(f);
-}
-
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
-{
- return _mm256_setzero_ps();
-}
-
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
-{
- return _mm256_setzero_si256();
-}
-
-static SIMDINLINE void SIMDCALL
- store_ps(float* p, Float const& a) // *p = a (stores all elements contiguously in memory)
-{
- _mm256_store_ps(p, a);
-}
-
-static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer const& a) // *p = a
-{
- _mm256_store_si256(&p->v, a);
-}
-
-static SIMDINLINE void SIMDCALL
- stream_ps(float* p, Float const& a) // *p = a (same as store_ps, but doesn't keep memory in cache)
-{
- _mm256_stream_ps(p, a);
-}
-
-//=======================================================================
-// Legacy interface (available only in SIMD256 width)
-//=======================================================================
-
-static SIMDINLINE Float SIMDCALL broadcast_ps(SIMD128Impl::Float const* p)
-{
- return _mm256_broadcast_ps(&p->v);
-}
-
-template <int ImmT>
-static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double const& a)
-{
- return _mm256_extractf128_pd(a, ImmT);
-}
-
-template <int ImmT>
-static SIMDINLINE SIMD128Impl::Float SIMDCALL extractf128_ps(Float const& a)
-{
- return _mm256_extractf128_ps(a, ImmT);
-}
-
-template <int ImmT>
-static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer const& a)
-{
- return _mm256_extractf128_si256(a, ImmT);
-}
-
-template <int ImmT>
-static SIMDINLINE Double SIMDCALL insertf128_pd(Double const& a, SIMD128Impl::Double const& b)
-{
- return _mm256_insertf128_pd(a, b, ImmT);
-}
-
-template <int ImmT>
-static SIMDINLINE Float SIMDCALL insertf128_ps(Float const& a, SIMD128Impl::Float const& b)
-{
- return _mm256_insertf128_ps(a, b, ImmT);
-}
-
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const& a, SIMD128Impl::Integer const& b)
-{
- return _mm256_insertf128_si256(a, b, ImmT);
-}
-
-#ifndef _mm256_set_m128i
-#define _mm256_set_m128i(/* SIMD128Impl::Integer */ hi, /* SIMD128Impl::Integer */ lo) \
- _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 0x1)
-#endif
-
-#ifndef _mm256_loadu2_m128i
-#define _mm256_loadu2_m128i(/* SIMD128Impl::Integer const* */ hiaddr, \
- /* SIMD128Impl::Integer const* */ loaddr) \
- _mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))
-#endif
-
-static SIMDINLINE Integer SIMDCALL loadu2_si(SIMD128Impl::Integer const* phi,
- SIMD128Impl::Integer const* plo)
-{
- return _mm256_loadu2_m128i(&phi->v, &plo->v);
-}
-
-static SIMDINLINE Integer SIMDCALL
- set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
-{
- return _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Float SIMDCALL
- set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
-{
- return _mm256_set_ps(i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer* phi,
- SIMD128Impl::Integer* plo,
- Integer const& src)
-{
- _mm256_storeu2_m128i(&phi->v, &plo->v, src);
-}
-
-static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
-{
- Integer vec = set1_epi32(mask);
- const Integer bit = set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = and_si(vec, bit);
- vec = cmplt_epi32(setzero_si(), vec);
- return castsi_ps(vec);
-}
-
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_2
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IFWRAPPER_2
-#undef SIMD_IFWRAPPER_2I
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_2I_
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_3
-#undef SIMD_EMU_IWRAPPER_1
-#undef SIMD_EMU_IWRAPPER_1I
-#undef SIMD_EMU_IWRAPPER_2
-#undef SIMD_EMU_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
deleted file mode 100644
index 8fce96dcea4..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
+++ /dev/null
@@ -1,255 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX2_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD256 AVX (2) implementation
-//
-// Since this implementation inherits from the AVX (1) implementation,
-// the only operations below ones that replace AVX (1) operations.
-// Mostly these are integer operations that are no longer emulated with SSE
-//============================================================================
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) { return _mm256_##op(a); }
-
-#define SIMD_IWRAPPER_1L(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return _mm256_##op(_mm256_castsi256_si128(a)); \
- }
-
-#define SIMD_IWRAPPER_1I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return _mm256_##op(a, ImmT); \
- }
-
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return _mm256_##intrin(a, ImmT); \
- }
-
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return _mm256_##intrin(a, b); \
- }
-
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return _mm256_##op(a, b); \
- }
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return _mm256_##op(a, b, ImmT); \
- }
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return _mm256_##op(a, b, ImmT); \
- }
-
-
-//-----------------------------------------------------------------------
-// Floating point arithmetic operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float const& a,
- Float const& b,
- Float const& c) // return (a * b) + c
-{
- return _mm256_fmadd_ps(a, b, c);
-}
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_IWRAPPER_2(mullo_epi32);
-SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
-SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-#if _MSC_VER >= 1920 // && _MSC_FULL_VER < [some_fixed_version]
-// Some versions of MSVC 2019 don't handle constant folding of and_si() correctly.
-// Using and_ps instead inhibits the compiler's constant folding and actually issues
-// the and intrinsic even though both inputs are constant values.
-#else
-// Use native integer and intrinsic
-SIMD_IWRAPPER_2_(and_si, and_si256); // return a & b (int)
-#endif
-SIMD_IWRAPPER_2_(andnot_si, andnot_si256); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_si256); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_si256); // return a ^ b (int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32)
-SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32)
-SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
-
-template <int ImmT> // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const& a)
-{
- return castsi_ps(srli_si<ImmT>(castps_si(a)));
-}
-
-//-----------------------------------------------------------------------
-// Conversion operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1L(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-SIMD_IWRAPPER_1L(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
-SIMD_IWRAPPER_1L(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
-SIMD_IWRAPPER_1L(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
-SIMD_IWRAPPER_1L(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
-
-//-----------------------------------------------------------------------
-// Comparison operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-
-static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const& a,
- Integer const& b) // return a < b (int32)
-{
- return cmpgt_epi32(b, a);
-}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32)
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-
-template <int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
-{
- return _mm256_permute_ps(a, ImmT);
-}
-
-SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
-
-static SIMDINLINE Float SIMDCALL
- permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- return _mm256_permutevar8x32_ps(a, swiz);
-}
-
-SIMD_IWRAPPER_1I(shuffle_epi32);
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const& a, Integer const& b)
-{
- return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
-}
-SIMD_IWRAPPER_2(shuffle_epi8);
-SIMD_IWRAPPER_2(unpackhi_epi16);
-SIMD_IWRAPPER_2(unpackhi_epi32);
-SIMD_IWRAPPER_2(unpackhi_epi64);
-SIMD_IWRAPPER_2(unpackhi_epi8);
-SIMD_IWRAPPER_2(unpacklo_epi16);
-SIMD_IWRAPPER_2(unpacklo_epi32);
-SIMD_IWRAPPER_2(unpacklo_epi64);
-SIMD_IWRAPPER_2(unpacklo_epi8);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return _mm256_i32gather_ps(p, idx, static_cast<int>(ScaleT));
-}
-
-#if _MSC_VER == 1920 // && _MSC_FULL_VER < [some_fixed_version]
-// Don't use _mm256_mask_i32gather_ps(), the compiler doesn't preserve the mask register
-// correctly in early versions of MSVC 2019
-#else
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
-{
- // g++ in debug mode needs the explicit .v suffix instead of relying on operator __m256()
- // Only for this intrinsic - not sure why. :(
- return _mm256_mask_i32gather_ps(old.v, p, idx.v, mask.v, static_cast<int>(ScaleT));
-}
-#endif
-
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const& a)
-{
- return static_cast<uint32_t>(_mm256_movemask_epi8(a));
-}
-
-//=======================================================================
-// Legacy interface (available only in SIMD256 width)
-//=======================================================================
-
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_1L
-#undef SIMD_IWRAPPER_1I
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
deleted file mode 100644
index 4c883b11a25..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
+++ /dev/null
@@ -1,349 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD256 AVX (512) implementation
-//
-// Since this implementation inherits from the AVX (2) implementation,
-// the only operations below ones that replace AVX (2) operations.
-// These use native AVX512 instructions with masking to enable a larger
-// register set.
-//============================================================================
-
-private:
-static SIMDINLINE __m512 __conv(Float r)
-{
- return _mm512_castps256_ps512(r.v);
-}
-static SIMDINLINE __m512d __conv(Double r)
-{
- return _mm512_castpd256_pd512(r.v);
-}
-static SIMDINLINE __m512i __conv(Integer r)
-{
- return _mm512_castsi256_si512(r.v);
-}
-static SIMDINLINE Float __conv(__m512 r)
-{
- return _mm512_castps512_ps256(r);
-}
-static SIMDINLINE Double __conv(__m512d r)
-{
- return _mm512_castpd512_pd256(r);
-}
-static SIMDINLINE Integer __conv(__m512i r)
-{
- return _mm512_castsi512_si256(r);
-}
-
-public:
-#define SIMD_WRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xff))
-
-#define SIMD_WRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xff))
-
-#define SIMD_WRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xff))
-
-#define SIMD_WRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT)); \
- }
-
-#define SIMD_WRAPPER_3_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
- }
-#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xff))
-
-#define SIMD_DWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
- }
-
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xff))
-
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xff))
-
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xff))
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT)); \
- }
-
-//-----------------------------------------------------------------------
-// Single precision floating point arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1_(rcp_ps, rcp14_ps, __mmask16(0xff)); // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xff)); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
-
-// SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_IWRAPPER_2_32(mullo_epi32);
-SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
-
-// SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
-// SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xff)); // return a & b (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_epi32, __mmask16(0xff)); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xff)); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xff)); // return a ^ b (int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2_32(sllv_epi32); // return a << b (uint32)
-SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32)
-
-// use AVX2 version
-// SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
-
-//-----------------------------------------------------------------------
-// Conversion operations (Use AVX2 versions)
-//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_1L(cvtepu8_epi16, 0xffff); // return (int16)a (uint8 --> int16)
-// SIMD_IWRAPPER_1L(cvtepu8_epi32, 0xff); // return (int32)a (uint8 --> int32)
-// SIMD_IWRAPPER_1L(cvtepu16_epi32, 0xff); // return (int32)a (uint16 --> int32)
-// SIMD_IWRAPPER_1L(cvtepu16_epi64, 0xf); // return (int64)a (uint16 --> int64)
-// SIMD_IWRAPPER_1L(cvtepu32_epi64, 0xf); // return (int64)a (uint32 --> int64)
-
-//-----------------------------------------------------------------------
-// Comparison operations (Use AVX2 versions
-//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi8); // return a == b (int8)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi16); // return a == b (int16)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi32); // return a == b (int32)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi64); // return a == b (int64)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,); // return a > b (int8)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi16); // return a > b (int16)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi32); // return a > b (int32)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi64); // return a > b (int64)
-//
-// static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32)
-//{
-// return cmpgt_epi32(b, a);
-//}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16
-// and _mm512_packs_epi16 SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation
-// for _mm256_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2_8(packus_epi16); // uint16 -->
-// uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for
-// _mm256_packus_epi32 and _mm512_packus_epi32
-
-// SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
-
-// static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for
-// each 32-bit lane i (float)
-//{
-// return _mm256_permutevar8x32_ps(a, swiz);
-//}
-
-SIMD_IWRAPPER_1I_32(shuffle_epi32);
-// template<int ImmT>
-// static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
-//{
-// return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
-//}
-// SIMD_IWRAPPER_2(shuffle_epi8);
-SIMD_IWRAPPER_2_32(unpackhi_epi32);
-SIMD_IWRAPPER_2_32(unpacklo_epi32);
-
-// SIMD_IWRAPPER_2_16(unpackhi_epi16);
-// SIMD_IWRAPPER_2_64(unpackhi_epi64);
-// SIMD_IWRAPPER_2_8(unpackhi_epi8);
-// SIMD_IWRAPPER_2_16(unpacklo_epi16);
-// SIMD_IWRAPPER_2_64(unpacklo_epi64);
-// SIMD_IWRAPPER_2_8(unpacklo_epi8);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL
- load_ps(float const* p) // return *p (loads SIMD width elements from memory)
-{
- return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
-}
-
-static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
-{
- return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
-}
-
-static SIMDINLINE Float SIMDCALL
- loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
-{
- return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
-}
-
-static SIMDINLINE Integer SIMDCALL
- loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
-{
- return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return __conv(_mm512_mask_i32gather_ps(
- _mm512_setzero_ps(), __mmask16(0xff), __conv(idx), p, static_cast<int>(ScaleT)));
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
- __mmask16 m = 0xff;
- m = _mm512_mask_test_epi32_mask(
- m, _mm512_castps_si512(__conv(mask)), _mm512_set1_epi32(0x80000000));
- return __conv(
- _mm512_mask_i32gather_ps(__conv(old), m, __conv(idx), p, static_cast<int>(ScaleT)));
-}
-
-// static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
-// {
-// __mmask64 m = 0xffffffffull;
-// return static_cast<uint32_t>(
-// _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
-// }
-
-static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
-{
- __mmask16 m = 0xff;
- m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
- _mm512_mask_storeu_ps(p, m, __conv(src));
-}
-
-static SIMDINLINE void SIMDCALL
- store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
-{
- _mm512_mask_storeu_ps(p, __mmask16(0xff), __conv(a));
-}
-
-static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
-{
- _mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a));
-}
-
-static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
-{
- return castsi_ps(__conv(_mm512_maskz_set1_epi32(__mmask16(mask & 0xff), -1)));
-}
-
-//=======================================================================
-// Legacy interface (available only in SIMD256 width)
-//=======================================================================
-
-#undef SIMD_WRAPPER_1_
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_1I_
-#undef SIMD_WRAPPER_1I
-#undef SIMD_WRAPPER_2_
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3_
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1_
-#undef SIMD_IWRAPPER_1_32
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_1I_32
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2_32
-#undef SIMD_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
deleted file mode 100644
index 1acdc7e07ff..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
+++ /dev/null
@@ -1,129 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD256 AVX (512) implementation for Core processors
-//
-// Since this implementation inherits from the AVX (2) implementation,
-// the only operations below ones that replace AVX (2) operations.
-// These use native AVX512 instructions with masking to enable a larger
-// register set.
-//============================================================================
-
-#define SIMD_DWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf))
-
-#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf))
-
-#define SIMD_DWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf))
-
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
- }
-#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf))
-
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
- }
-#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf))
-
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
- }
-#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf))
-
-SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and
- // _mm512_packs_epi16
-SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and
- // _mm512_packs_epi32
-SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and
- // _mm512_packus_epi16
-SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
- // _mm512_packus_epi32
-SIMD_IWRAPPER_2_16(unpackhi_epi16);
-SIMD_IWRAPPER_2_64(unpackhi_epi64);
-SIMD_IWRAPPER_2_8(unpackhi_epi8);
-SIMD_IWRAPPER_2_16(unpacklo_epi16);
-SIMD_IWRAPPER_2_64(unpacklo_epi64);
-SIMD_IWRAPPER_2_8(unpacklo_epi8);
-
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
-{
- __mmask64 m = 0xffffffffull;
- return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
-}
-
-#undef SIMD_DWRAPPER_1_
-#undef SIMD_DWRAPPER_1
-#undef SIMD_DWRAPPER_1I_
-#undef SIMD_DWRAPPER_1I
-#undef SIMD_DWRAPPER_2_
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_IWRAPPER_1_
-#undef SIMD_IWRAPPER_1_8
-#undef SIMD_IWRAPPER_1_16
-#undef SIMD_IWRAPPER_1_64
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_1I_8
-#undef SIMD_IWRAPPER_1I_16
-#undef SIMD_IWRAPPER_1I_64
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2_8
-#undef SIMD_IWRAPPER_2_16
-#undef SIMD_IWRAPPER_2_64
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
deleted file mode 100644
index 52b6ca2b61e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
+++ /dev/null
@@ -1,34 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD256 AVX (512) implementation for Knights Family
-//
-// Since this implementation inherits from the AVX (2) implementation,
-// the only operations below ones that replace AVX (2) operations.
-// These use native AVX512 instructions with masking to enable a larger
-// register set.
-//============================================================================
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
deleted file mode 100644
index 5053275e8d6..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ /dev/null
@@ -1,699 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
-// gcc as of 7.1 was missing these intrinsics
-#ifndef _mm512_cmpneq_ps_mask
-#define _mm512_cmpneq_ps_mask(a, b) _mm512_cmp_ps_mask((a), (b), _CMP_NEQ_UQ)
-#endif
-
-#ifndef _mm512_cmplt_ps_mask
-#define _mm512_cmplt_ps_mask(a, b) _mm512_cmp_ps_mask((a), (b), _CMP_LT_OS)
-#endif
-
-#ifndef _mm512_cmplt_pd_mask
-#define _mm512_cmplt_pd_mask(a, b) _mm512_cmp_pd_mask((a), (b), _CMP_LT_OS)
-#endif
-
-#endif
-
-//============================================================================
-// SIMD16 AVX512 (F) implementation (compatible with Knights and Core
-// processors)
-//
-//============================================================================
-
-static const int TARGET_SIMD_WIDTH = 16;
-using SIMD256T = SIMD256Impl::AVX2Impl;
-
-#define SIMD_WRAPPER_1_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
-
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
-
-#define SIMD_WRAPPER_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
-
-#define SIMD_WRAPPERI_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm512_castsi512_ps( \
- _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
- }
-
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
-
-#define SIMD_WRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
-
-#define SIMD_DWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
-
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1_8(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
-
-#define SIMD_IWRAPPER_1_4(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
-
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return intrin(a, ImmT); \
- }
-#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
-
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
-#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
-
-#define SIMD_IWRAPPER_2_CMP(op, cmp) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
-
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
- }
-
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-
-private:
-static SIMDINLINE Integer vmask(__mmask16 m)
-{
- return _mm512_maskz_set1_epi32(m, -1);
-}
-
-static SIMDINLINE Integer vmask(__mmask8 m)
-{
- return _mm512_maskz_set1_epi64(m, -1LL);
-}
-
-public:
-//-----------------------------------------------------------------------
-// Single precision floating point arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1_(rcp_ps, _mm512_rcp14_ps); // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, _mm512_rsqrt14_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
-
-template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float a)
-{
- return _mm512_roundscale_ps(a, static_cast<int>(RMT));
-}
-
-static SIMDINLINE Float SIMDCALL ceil_ps(Float a)
-{
- return round_ps<RoundMode::CEIL_NOEXC>(a);
-}
-static SIMDINLINE Float SIMDCALL floor_ps(Float a)
-{
- return round_ps<RoundMode::FLOOR_NOEXC>(a);
-}
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-// SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-// SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_IWRAPPER_2(mullo_epi32);
-SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
-SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-// SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_si512); // return a & b (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_si512); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_si512); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_si512); // return a ^ b (int)
-
-// SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-// SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-// SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-// SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2(sllv_epi32);
-SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
-
-#if 0
-SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint)
-
-template<int ImmT> // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
-{
- return castsi_ps(srli_si<ImmT>(castps_si(a)));
-}
-#endif
-
-SIMD_IWRAPPER_2(srlv_epi32);
-
-//-----------------------------------------------------------------------
-// Conversion operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
-{
- return _mm512_castpd_ps(a);
-}
-
-static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
-{
- return _mm512_castps_si512(a);
-}
-
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
-{
- return _mm512_castsi512_pd(a);
-}
-
-static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
-{
- return _mm512_castps_pd(a);
-}
-
-static SIMDINLINE Integer SIMDCALL castpd_si(Double a) // return *(Integer*)(&a)
-{
- return _mm512_castpd_si512(a);
-}
-
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
-{
- return _mm512_castsi512_ps(a);
-}
-
-static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a (int32 --> float)
-{
- return _mm512_cvtepi32_ps(a);
-}
-
-// SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-SIMD_IWRAPPER_1_4(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
-SIMD_IWRAPPER_1_8(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
-SIMD_IWRAPPER_1_4(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
-SIMD_IWRAPPER_1_8(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
-
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32)
-{
- return _mm512_cvtps_epi32(a);
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32)
-{
- return _mm512_cvttps_epi32(a);
-}
-
-//-----------------------------------------------------------------------
-// Comparison operations
-//-----------------------------------------------------------------------
-template <CompareType CmpTypeT>
-static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float a, Float b)
-{
- return _mm512_cmp_ps_mask(a, b, static_cast<const int>(CmpTypeT));
-}
-
-template <CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b
-{
- // Legacy vector mask generator
- __mmask16 result = cmp_ps_mask<CmpTypeT>(a, b);
- return castsi_ps(vmask(result));
-}
-
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::LT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::GT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::NEQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::EQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::GE_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b)
-{
- return cmp_ps<CompareType::LE_OQ>(a, b);
-}
-
-template <CompareTypeInt CmpTypeT>
-static SIMDINLINE Integer SIMDCALL cmp_epi32(Integer a, Integer b)
-{
- // Legacy vector mask generator
- __mmask16 result = _mm512_cmp_epi32_mask(a, b, static_cast<const int>(CmpTypeT));
- return vmask(result);
-}
-template <CompareTypeInt CmpTypeT>
-static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, Integer b)
-{
- // Legacy vector mask generator
- __mmask8 result = _mm512_cmp_epi64_mask(a, b, static_cast<const int>(CmpTypeT));
- return vmask(result);
-}
-
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
-// SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32<CompareTypeInt::EQ>); // return a == b (int32)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64<CompareTypeInt::EQ>); // return a == b (int64)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
-// SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32<CompareTypeInt::GT>); // return a > b (int32)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64<CompareTypeInt::GT>); // return a > b (int64)
-SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32<CompareTypeInt::LT>); // return a < b (int32)
-
-static SIMDINLINE bool SIMDCALL testz_ps(Float a,
- Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
-{
- return (0 == static_cast<int>(_mm512_test_epi32_mask(castps_si(a), castps_si(b))));
-}
-
-static SIMDINLINE bool SIMDCALL testz_si(Integer a,
- Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
-{
- return (0 == static_cast<int>(_mm512_test_epi32_mask(a, b)));
-}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-template <int ImmT>
-static SIMDINLINE Float blend_ps(Float a, Float b) // return ImmT ? b : a (float)
-{
- return _mm512_mask_blend_ps(__mmask16(ImmT), a, b);
-}
-
-template <int ImmT>
-static SIMDINLINE Integer blend_epi32(Integer a, Integer b) // return ImmT ? b : a (int32)
-{
- return _mm512_mask_blend_epi32(__mmask16(ImmT), a, b);
-}
-
-static SIMDINLINE Float blendv_ps(Float a, Float b, Float mask) // return mask ? b : a (float)
-{
- return _mm512_mask_blend_ps(__mmask16(movemask_ps(mask)), a, b);
-}
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
- Integer b,
- Float mask) // return mask ? b : a (int)
-{
- return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
-}
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
- Integer b,
- Integer mask) // return mask ? b : a (int)
-{
- return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
-}
-
-static SIMDINLINE Float SIMDCALL
- broadcast_ss(float const* p) // return *p (all elements in vector get same value)
-{
- return _mm512_set1_ps(*p);
-}
-
-template <int imm>
-static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float a)
-{
- return _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a), imm));
-}
-
-template <int imm>
-static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double a)
-{
- return _mm512_extractf64x4_pd(a, imm);
-}
-
-template <int imm>
-static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer a)
-{
- return _mm512_extracti64x4_epi64(a, imm);
-}
-
-template <int imm>
-static SIMDINLINE Float SIMDCALL insert_ps(Float a, SIMD256Impl::Float b)
-{
- return _mm512_castpd_ps(_mm512_insertf64x4(_mm512_castps_pd(a), _mm256_castps_pd(b), imm));
-}
-
-template <int imm>
-static SIMDINLINE Double SIMDCALL insert_pd(Double a, SIMD256Impl::Double b)
-{
- return _mm512_insertf64x4(a, b, imm);
-}
-
-template <int imm>
-static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b)
-{
- return _mm512_inserti64x4(a, b, imm);
-}
-
-// SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and
-// _mm512_packs_epi16 SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32
-// and _mm512_packs_epi32 SIMD_IWRAPPER_2(packus_epi16); // See documentation for
-// _mm512_packus_epi16 and _mm512_packus_epi16 SIMD_IWRAPPER_2(packus_epi32); // See documentation
-// for _mm512_packus_epi32 and _mm512_packus_epi32
-
-template <int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
-{
- return _mm512_permute_ps(a, ImmT);
-}
-
-static SIMDINLINE Integer SIMDCALL
- permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- return _mm512_permutexvar_epi32(swiz, a);
-}
-
-static SIMDINLINE Float SIMDCALL
- permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- return _mm512_permutexvar_ps(swiz, a);
-}
-
-SIMD_WRAPPER_2I_(permute2f128_ps, shuffle_f32x4);
-SIMD_DWRAPPER_2I_(permute2f128_pd, shuffle_f64x2);
-SIMD_IWRAPPER_2I_(permute2f128_si, shuffle_i32x4);
-
-SIMD_IWRAPPER_1I(shuffle_epi32);
-
-// SIMD_IWRAPPER_2(shuffle_epi8);
-SIMD_DWRAPPER_2I(shuffle_pd);
-SIMD_WRAPPER_2I(shuffle_ps);
-
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
-{
- return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
-}
-
-SIMD_IWRAPPER_2(unpackhi_epi16);
-
-// SIMD_IFWRAPPER_2(unpackhi_epi32, _mm512_unpackhi_ps);
-static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
-{
- return castps_si(_mm512_unpackhi_ps(castsi_ps(a), castsi_ps(b)));
-}
-
-SIMD_IWRAPPER_2(unpackhi_epi64);
-// SIMD_IWRAPPER_2(unpackhi_epi8);
-SIMD_DWRAPPER_2(unpackhi_pd);
-SIMD_WRAPPER_2(unpackhi_ps);
-// SIMD_IWRAPPER_2(unpacklo_epi16);
-SIMD_IFWRAPPER_2(unpacklo_epi32, unpacklo_ps);
-SIMD_IWRAPPER_2(unpacklo_epi64);
-// SIMD_IWRAPPER_2(unpacklo_epi8);
-SIMD_DWRAPPER_2(unpacklo_pd);
-SIMD_WRAPPER_2(unpacklo_ps);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT));
-}
-
-static SIMDINLINE Float SIMDCALL
- load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
-{
- return broadcast_ss(p);
-}
-
-static SIMDINLINE Float SIMDCALL
- load_ps(float const* p) // return *p (loads SIMD width elements from memory)
-{
- return _mm512_load_ps(p);
-}
-
-static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
-{
- return _mm512_load_si512(&p->v);
-}
-
-static SIMDINLINE Float SIMDCALL
- loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
-{
- return _mm512_loadu_ps(p);
-}
-
-static SIMDINLINE Integer SIMDCALL
- loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
-{
- return _mm512_loadu_si512(p);
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
- __mmask16 k = _mm512_test_epi32_mask(castps_si(mask), set1_epi32(0x80000000));
-
- return _mm512_mask_i32gather_ps(old, k, idx, p, static_cast<int>(ScaleT));
-}
-
-static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
-{
- Mask m = _mm512_cmplt_epi32_mask(mask, setzero_si());
- _mm512_mask_store_ps(p, m, src);
-}
-
-// static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
-//{
-// __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
-// return static_cast<uint64_t>(m);
-//}
-
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a)
-{
- __mmask8 m = _mm512_test_epi64_mask(castpd_si(a), set1_epi64(0x8000000000000000LL));
- return static_cast<uint32_t>(m);
-}
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a)
-{
- __mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x80000000));
- return static_cast<uint32_t>(m);
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi64(long long i) // return i (all elements are same value)
-{
- return _mm512_set1_epi64(i);
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value)
-{
- return _mm512_set1_epi32(i);
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements are same value)
-{
- return _mm512_set1_epi8(i);
-}
-
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
-{
- return _mm512_set1_ps(f);
-}
-
-static SIMDINLINE Double SIMDCALL setzero_pd() // return 0 (double)
-{
- return _mm512_setzero_pd();
-}
-
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
-{
- return _mm512_setzero_ps();
-}
-
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
-{
- return _mm512_setzero_si512();
-}
-
-static SIMDINLINE void SIMDCALL
- store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
-{
- _mm512_store_ps(p, a);
-}
-
-static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
-{
- _mm512_store_si512(&p->v, a);
-}
-
-static SIMDINLINE void SIMDCALL
- storeu_si(Integer* p, Integer a) // *p = a (same as store_si but allows for unaligned mem)
-{
- _mm512_storeu_si512(&p->v, a);
-}
-
-static SIMDINLINE void SIMDCALL
- stream_ps(float* p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache)
-{
- _mm512_stream_ps(p, a);
-}
-
-static SIMDINLINE Integer SIMDCALL set_epi32(int i15,
- int i14,
- int i13,
- int i12,
- int i11,
- int i10,
- int i9,
- int i8,
- int i7,
- int i6,
- int i5,
- int i4,
- int i3,
- int i2,
- int i1,
- int i0)
-{
- return _mm512_set_epi32(i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Integer SIMDCALL
- set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
-{
- return set_epi32(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Float SIMDCALL set_ps(float i15,
- float i14,
- float i13,
- float i12,
- float i11,
- float i10,
- float i9,
- float i8,
- float i7,
- float i6,
- float i5,
- float i4,
- float i3,
- float i2,
- float i1,
- float i0)
-{
- return _mm512_set_ps(i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Float SIMDCALL
- set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
-{
- return set_ps(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
-{
- return castsi_ps(_mm512_maskz_mov_epi32(__mmask16(mask), set1_epi32(-1)));
-}
-
-#undef SIMD_WRAPPER_1_
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2_
-#undef SIMD_WRAPPERI_2_
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_WRAPPER_2I_
-#undef SIMD_WRAPPER_3_
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IFWRAPPER_2
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_1I
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
deleted file mode 100644
index 82aa2bb4173..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
+++ /dev/null
@@ -1,186 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD16 AVX512 (F) implementation for Core processors
-//
-//============================================================================
-
-#define SIMD_WRAPPER_1_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
-
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
-
-#define SIMD_WRAPPER_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
-
-#define SIMD_WRAPPERI_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm512_castsi512_ps( \
- _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
- }
-
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
-
-#define SIMD_WRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
-
-#define SIMD_DWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
-
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1_8(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
-
-#define SIMD_IWRAPPER_1_4(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
-
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return intrin(a, ImmT); \
- }
-#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
-
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
-#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
-
-#define SIMD_IWRAPPER_2_CMP(op, cmp) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
-
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
- }
-
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-
-private:
-static SIMDINLINE Integer vmask(__mmask32 m)
-{
- return _mm512_maskz_set1_epi16(m, -1);
-}
-static SIMDINLINE Integer vmask(__mmask64 m)
-{
- return _mm512_maskz_set1_epi8(m, -1);
-}
-
-public:
-SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-
-SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-
-template <CompareTypeInt CmpTypeT>
-static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
-{
- // Legacy vector mask generator
- __mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT));
- return vmask(result);
-}
-template <CompareTypeInt CmpTypeT>
-static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
-{
- // Legacy vector mask generator
- __mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast<const int>(CmpTypeT));
- return vmask(result);
-}
-
-SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
-
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32
-
-SIMD_IWRAPPER_2(unpackhi_epi8); // See documentation for _mm512_unpackhi_epi8
-SIMD_IWRAPPER_2(unpacklo_epi16); // See documentation for _mm512_unpacklo_epi16
-SIMD_IWRAPPER_2(unpacklo_epi8); // See documentation for _mm512_unpacklo_epi8
-
-SIMD_IWRAPPER_2(shuffle_epi8);
-
-static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
-{
- __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
- return static_cast<uint64_t>(m);
-}
-
-#undef SIMD_WRAPPER_1_
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2_
-#undef SIMD_WRAPPERI_2_
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_WRAPPER_2I_
-#undef SIMD_WRAPPER_3_
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IFWRAPPER_2
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_1I
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
deleted file mode 100644
index 9ec3ff6c6b1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
+++ /dev/null
@@ -1,132 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD16 AVX512 (F) implementation for Knights Family Processors
-//
-//============================================================================
-
-#define SIMD_WRAPPER_1_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
-
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
-
-#define SIMD_WRAPPER_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
-
-#define SIMD_WRAPPERI_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm512_castsi512_ps( \
- _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
- }
-
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
-
-#define SIMD_WRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
-
-#define SIMD_DWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
-
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1_8(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
-
-#define SIMD_IWRAPPER_1_4(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
-
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- { \
- return intrin(a, ImmT); \
- }
-#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
-
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
-#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
-
-#define SIMD_IWRAPPER_2_CMP(op, cmp) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
-
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
- }
-
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- { \
- return _mm512_##intrin(a, b, ImmT); \
- }
-#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-
-SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int)
-SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int)
-SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int)
-SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int)
-
-#undef SIMD_WRAPPER_1_
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2_
-#undef SIMD_WRAPPERI_2_
-#undef SIMD_DWRAPPER_2
-#undef SIMD_DWRAPPER_2I
-#undef SIMD_WRAPPER_2I_
-#undef SIMD_WRAPPER_3_
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IFWRAPPER_2
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_1I
-#undef SIMD_IWRAPPER_1I_
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IWRAPPER_2_
-#undef SIMD_IWRAPPER_2I
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl
deleted file mode 100644
index f9d4b8c3902..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl
+++ /dev/null
@@ -1,27 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-// Implement mask-enabled SIMD functions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
deleted file mode 100644
index f9d4b8c3902..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
+++ /dev/null
@@ -1,27 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-// Implement mask-enabled SIMD functions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl
deleted file mode 100644
index f9d4b8c3902..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl
+++ /dev/null
@@ -1,27 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX512_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-// Implement mask-enabled SIMD functions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
deleted file mode 100644
index ec905505dc4..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
+++ /dev/null
@@ -1,852 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-//============================================================================
-// SIMD16 AVX (1) implementation
-//============================================================================
-
-static const int TARGET_SIMD_WIDTH = 8;
-using SIMD128T = SIMD128Impl::AVXImpl;
-
-#define SIMD_WRAPPER_1(op) \
- static SIMDINLINE Float SIMDCALL op(Float const& a) \
- { \
- return Float{ \
- SIMD256T::op(a.v8[0]), \
- SIMD256T::op(a.v8[1]), \
- }; \
- }
-
-#define SIMD_WRAPPER_2(op) \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
- { \
- return Float{ \
- SIMD256T::op(a.v8[0], b.v8[0]), \
- SIMD256T::op(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_WRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
- { \
- return Float{ \
- SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]), \
- SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_WRAPPER_2I_1(op) \
- template <int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
- { \
- return Float{ \
- SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]), \
- SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b, Float const& c) \
- { \
- return Float{ \
- SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]), \
- SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]), \
- }; \
- }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
- { \
- return Integer{ \
- SIMD256T::op(a.v8[0]), \
- SIMD256T::op(a.v8[1]), \
- }; \
- }
-
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return Integer{ \
- SIMD256T::op(a.v8[0], b.v8[0]), \
- SIMD256T::op(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_IWRAPPER_2I(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return Integer{ \
- SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]), \
- SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_IWRAPPER_2I_1(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return Integer{ \
- SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]), \
- SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_IWRAPPER_2I_2(op) \
- template <int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
- { \
- return Integer{ \
- SIMD256T::template op<0xF & ImmT>(a.v8[0], b.v8[0]), \
- SIMD256T::template op<0xF & (ImmT >> 4)>(a.v8[1], b.v8[1]), \
- }; \
- }
-
-#define SIMD_IWRAPPER_3(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b, Integer const& c) \
- { \
- return Integer{ \
- SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]), \
- SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]), \
- }; \
- }
-
-//-----------------------------------------------------------------------
-// Single precision floating point arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
-
-template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float const& a)
-{
- return Float{
- SIMD256T::template round_ps<RMT>(a.v8[0]),
- SIMD256T::template round_ps<RMT>(a.v8[1]),
- };
-}
-
-static SIMDINLINE Float SIMDCALL ceil_ps(Float const& a)
-{
- return round_ps<RoundMode::CEIL_NOEXC>(a);
-}
-static SIMDINLINE Float SIMDCALL floor_ps(Float const& a)
-{
- return round_ps<RoundMode::FLOOR_NOEXC>(a);
-}
-
-//-----------------------------------------------------------------------
-// Integer (various width) arithmetic operations
-//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
-
-// return (a * b) & 0xFFFFFFFF
-//
-// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-// and store the low 32 bits of the intermediate integers in dst.
-SIMD_IWRAPPER_2(mullo_epi32);
-SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
-SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-
-//-----------------------------------------------------------------------
-// Logical operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-SIMD_IWRAPPER_2(and_si); // return a & b (int)
-SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-SIMD_IWRAPPER_2(andnot_si); // return (~a) & b (int)
-SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-SIMD_IWRAPPER_2(or_si); // return a | b (int)
-SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-SIMD_IWRAPPER_2(xor_si); // return a ^ b (int)
-
-//-----------------------------------------------------------------------
-// Shift operations
-//-----------------------------------------------------------------------
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL slli_epi32(Integer const& a) // return a << ImmT
-{
- return Integer{
- SIMD256T::template slli_epi32<ImmT>(a.v8[0]),
- SIMD256T::template slli_epi32<ImmT>(a.v8[1]),
- };
-}
-
-SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32)
-
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL srai_epi32(Integer const& a) // return a >> ImmT (int32)
-{
- return Integer{
- SIMD256T::template srai_epi32<ImmT>(a.v8[0]),
- SIMD256T::template srai_epi32<ImmT>(a.v8[1]),
- };
-}
-
-template <int ImmT>
-static SIMDINLINE Integer SIMDCALL srli_epi32(Integer const& a) // return a >> ImmT (uint32)
-{
- return Integer{
- SIMD256T::template srli_epi32<ImmT>(a.v8[0]),
- SIMD256T::template srli_epi32<ImmT>(a.v8[1]),
- };
-}
-
-template <int ImmT> // for each 128-bit lane:
-static SIMDINLINE Integer SIMDCALL srli_si(Integer const& a) // return a >> (ImmT*8) (uint)
-{
- return Integer{
- SIMD256T::template srli_si<ImmT>(a.v8[0]),
- SIMD256T::template srli_si<ImmT>(a.v8[1]),
- };
-}
-template <int ImmT>
-static SIMDINLINE Float SIMDCALL
- srlisi_ps(Float const& a) // same as srli_si, but with Float cast to int
-{
- return Float{
- SIMD256T::template srlisi_ps<ImmT>(a.v8[0]),
- SIMD256T::template srlisi_ps<ImmT>(a.v8[1]),
- };
-}
-
-SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32)
-
-//-----------------------------------------------------------------------
-// Conversion operations
-//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double const& a) // return *(Float*)(&a)
-{
- return Float{
- SIMD256T::castpd_ps(a.v8[0]),
- SIMD256T::castpd_ps(a.v8[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL castps_si(Float const& a) // return *(Integer*)(&a)
-{
- return Integer{
- SIMD256T::castps_si(a.v8[0]),
- SIMD256T::castps_si(a.v8[1]),
- };
-}
-
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer const& a) // return *(Double*)(&a)
-{
- return Double{
- SIMD256T::castsi_pd(a.v8[0]),
- SIMD256T::castsi_pd(a.v8[1]),
- };
-}
-
-static SIMDINLINE Double SIMDCALL castps_pd(Float const& a) // return *(Double*)(&a)
-{
- return Double{
- SIMD256T::castps_pd(a.v8[0]),
- SIMD256T::castps_pd(a.v8[1]),
- };
-}
-
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer const& a) // return *(Float*)(&a)
-{
- return Float{
- SIMD256T::castsi_ps(a.v8[0]),
- SIMD256T::castsi_ps(a.v8[1]),
- };
-}
-
-static SIMDINLINE Float SIMDCALL
- cvtepi32_ps(Integer const& a) // return (float)a (int32 --> float)
-{
- return Float{
- SIMD256T::cvtepi32_ps(a.v8[0]),
- SIMD256T::cvtepi32_ps(a.v8[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvtepu8_epi16(SIMD256Impl::Integer const& a) // return (int16)a (uint8 --> int16)
-{
- return Integer{
- SIMD256T::cvtepu8_epi16(a.v4[0]),
- SIMD256T::cvtepu8_epi16(a.v4[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvtepu8_epi32(SIMD256Impl::Integer const& a) // return (int32)a (uint8 --> int32)
-{
- return Integer{
- SIMD256T::cvtepu8_epi32(a.v4[0]),
- SIMD256T::cvtepu8_epi32(SIMD128T::template srli_si<8>(a.v4[0])),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvtepu16_epi32(SIMD256Impl::Integer const& a) // return (int32)a (uint16 --> int32)
-{
- return Integer{
- SIMD256T::cvtepu16_epi32(a.v4[0]),
- SIMD256T::cvtepu16_epi32(a.v4[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvtepu16_epi64(SIMD256Impl::Integer const& a) // return (int64)a (uint16 --> int64)
-{
- return Integer{
- SIMD256T::cvtepu16_epi64(a.v4[0]),
- SIMD256T::cvtepu16_epi64(SIMD128T::template srli_si<8>(a.v4[0])),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvtepu32_epi64(SIMD256Impl::Integer const& a) // return (int64)a (uint32 --> int64)
-{
- return Integer{
- SIMD256T::cvtepu32_epi64(a.v4[0]),
- SIMD256T::cvtepu32_epi64(a.v4[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvtps_epi32(Float const& a) // return (int32)a (float --> int32)
-{
- return Integer{
- SIMD256T::cvtps_epi32(a.v8[0]),
- SIMD256T::cvtps_epi32(a.v8[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL
- cvttps_epi32(Float const& a) // return (int32)a (rnd_to_zero(float) --> int32)
-{
- return Integer{
- SIMD256T::cvtps_epi32(a.v8[0]),
- SIMD256T::cvtps_epi32(a.v8[1]),
- };
-}
-
-//-----------------------------------------------------------------------
-// Comparison operations
-//-----------------------------------------------------------------------
-template <CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float const& a, Float const& b) // return a (CmpTypeT) b
-{
- return Float{
- SIMD256T::template cmp_ps<CmpTypeT>(a.v8[0], b.v8[0]),
- SIMD256T::template cmp_ps<CmpTypeT>(a.v8[1], b.v8[1]),
- };
-}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::LT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::GT_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::NEQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::EQ_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::GE_OQ>(a, b);
-}
-static SIMDINLINE Float SIMDCALL cmple_ps(Float const& a, Float const& b)
-{
- return cmp_ps<CompareType::LE_OQ>(a, b);
-}
-
-template <CompareType CmpTypeT>
-static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float const& a, Float const& b)
-{
- return static_cast<Mask>(movemask_ps(cmp_ps<CmpTypeT>(a, b)));
-}
-
-SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-
-static SIMDINLINE bool SIMDCALL
- testz_ps(Float const& a, Float const& b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
-{
- return 0 != (SIMD256T::testz_ps(a.v8[0], b.v8[0]) & SIMD256T::testz_ps(a.v8[1], b.v8[1]));
-}
-
-static SIMDINLINE bool SIMDCALL
- testz_si(Integer const& a, Integer const& b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
-{
- return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) & SIMD256T::testz_si(a.v8[1], b.v8[1]));
-}
-
-//-----------------------------------------------------------------------
-// Blend / shuffle / permute operations
-//-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
-SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32)
-SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
- Integer const& b,
- Float const& mask) // return mask ? b : a (int)
-{
- return Integer{
- SIMD256T::blendv_epi32(a.v8[0], b.v8[0], mask.v8[0]),
- SIMD256T::blendv_epi32(a.v8[1], b.v8[1], mask.v8[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
- Integer const& b,
- Integer const& mask) // return mask ? b : a (int)
-{
- return Integer{
- SIMD256T::blendv_epi32(a.v8[0], b.v8[0], mask.v8[0]),
- SIMD256T::blendv_epi32(a.v8[1], b.v8[1], mask.v8[1]),
- };
-}
-
-static SIMDINLINE Float SIMDCALL
- broadcast_ss(float const* p) // return *p (all elements in vector get same value)
-{
- float f = *p;
- return Float{
- SIMD256T::set1_ps(f),
- SIMD256T::set1_ps(f),
- };
-}
-
-template <int imm>
-static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float const& a)
-{
- SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- return a.v8[imm];
-}
-
-template <int imm>
-static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double const& a)
-{
- SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- return a.v8[imm];
-}
-
-template <int imm>
-static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer const& a)
-{
- SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- return a.v8[imm];
-}
-
-template <int imm>
-static SIMDINLINE Float SIMDCALL insert_ps(Float const& a, SIMD256Impl::Float const& b)
-{
- SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- Float r = a;
- r.v8[imm] = b;
- return r;
-}
-
-template <int imm>
-static SIMDINLINE Double SIMDCALL insert_pd(Double const& a, SIMD256Impl::Double const& b)
-{
- SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- Double r = a;
- r.v8[imm] = b;
- return r;
-}
-
-template <int imm>
-static SIMDINLINE Integer SIMDCALL insert_si(Integer const& a, SIMD256Impl::Integer const& b)
-{
- SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- Integer r = a;
- r.v8[imm] = b;
- return r;
-}
-
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-
-template <int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
-{
- return Float{
- SIMD256T::template permute_ps<ImmT>(a.v8[0]),
- SIMD256T::template permute_ps<ImmT>(a.v8[1]),
- };
-}
-
-static SIMDINLINE Integer SIMDCALL permute_epi32(
- Integer const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
-{
- return castps_si(permute_ps(castsi_ps(a), swiz));
-}
-
-static SIMDINLINE Float SIMDCALL
- permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
-{
- const auto mask = SIMD256T::set1_epi32(7);
-
- auto lolo = SIMD256T::permute_ps(a.v8[0], SIMD256T::and_si(swiz.v8[0], mask));
- auto lohi = SIMD256T::permute_ps(a.v8[1], SIMD256T::and_si(swiz.v8[0], mask));
-
- auto hilo = SIMD256T::permute_ps(a.v8[0], SIMD256T::and_si(swiz.v8[1], mask));
- auto hihi = SIMD256T::permute_ps(a.v8[1], SIMD256T::and_si(swiz.v8[1], mask));
-
- return Float{
- SIMD256T::blendv_ps(
- lolo, lohi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[0], mask))),
- SIMD256T::blendv_ps(
- hilo, hihi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[1], mask))),
- };
-}
-
-// All of the 512-bit permute2f128_XX intrinsics do the following:
-//
-// SELECT4(src, control) {
-// CASE(control[1:0])
-// 0 : tmp[127:0] : = src[127:0]
-// 1 : tmp[127:0] : = src[255:128]
-// 2 : tmp[127:0] : = src[383:256]
-// 3 : tmp[127:0] : = src[511:384]
-// ESAC
-// RETURN tmp[127:0]
-// }
-//
-// dst[127:0] : = SELECT4(a[511:0], imm8[1:0])
-// dst[255:128] : = SELECT4(a[511:0], imm8[3:2])
-// dst[383:256] : = SELECT4(b[511:0], imm8[5:4])
-// dst[511:384] : = SELECT4(b[511:0], imm8[7:6])
-// dst[MAX:512] : = 0
-//
-// Since the 256-bit AVX instructions use a 4-bit control field (instead
-// of 2-bit for AVX512), we need to expand the control bits sent to the
-// AVX instructions for emulation.
-//
-template <int shuf>
-static SIMDINLINE Float SIMDCALL permute2f128_ps(Float const& a, Float const& b)
-{
- return Float{
- SIMD256T::template permute2f128_ps<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
- a.v8[1]),
- SIMD256T::template permute2f128_ps<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
- b.v8[1]),
- };
-}
-
-template <int shuf>
-static SIMDINLINE Double SIMDCALL permute2f128_pd(Double const& a, Double const& b)
-{
- return Double{
- SIMD256T::template permute2f128_pd<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
- a.v8[1]),
- SIMD256T::template permute2f128_pd<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
- b.v8[1]),
- };
-}
-
-template <int shuf>
-static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const& a, Integer const& b)
-{
- return Integer{
- SIMD256T::template permute2f128_si<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
- a.v8[1]),
- SIMD256T::template permute2f128_si<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
- b.v8[1]),
- };
-}
-
-SIMD_IWRAPPER_2I_1(shuffle_epi32);
-SIMD_IWRAPPER_2I_2(shuffle_epi64);
-SIMD_IWRAPPER_2(shuffle_epi8);
-SIMD_WRAPPER_2I_1(shuffle_pd);
-SIMD_WRAPPER_2I_1(shuffle_ps);
-SIMD_IWRAPPER_2(unpackhi_epi16);
-SIMD_IWRAPPER_2(unpackhi_epi32);
-SIMD_IWRAPPER_2(unpackhi_epi64);
-SIMD_IWRAPPER_2(unpackhi_epi8);
-SIMD_WRAPPER_2(unpackhi_pd);
-SIMD_WRAPPER_2(unpackhi_ps);
-SIMD_IWRAPPER_2(unpacklo_epi16);
-SIMD_IWRAPPER_2(unpacklo_epi32);
-SIMD_IWRAPPER_2(unpacklo_epi64);
-SIMD_IWRAPPER_2(unpacklo_epi8);
-SIMD_WRAPPER_2(unpacklo_pd);
-SIMD_WRAPPER_2(unpacklo_ps);
-
-//-----------------------------------------------------------------------
-// Load / store operations
-//-----------------------------------------------------------------------
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return Float{
- SIMD256T::template i32gather_ps<ScaleT>(p, idx.v8[0]),
- SIMD256T::template i32gather_ps<ScaleT>(p, idx.v8[1]),
- };
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- sw_i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
-{
- return Float{
- SIMD256T::template sw_i32gather_ps<ScaleT>(p, idx.v8[0]),
- SIMD256T::template sw_i32gather_ps<ScaleT>(p, idx.v8[1]),
- };
-}
-
-static SIMDINLINE Float SIMDCALL
- load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
-{
- return broadcast_ss(p);
-}
-
-static SIMDINLINE Float SIMDCALL
- load_ps(float const* p) // return *p (loads SIMD width elements from memory)
-{
- return Float{SIMD256T::load_ps(p), SIMD256T::load_ps(p + TARGET_SIMD_WIDTH)};
-}
-
-static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
-{
- return Integer{
- SIMD256T::load_si(&p->v8[0]),
- SIMD256T::load_si(&p->v8[1]),
- };
-}
-
-static SIMDINLINE Float SIMDCALL
- loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
-{
- return Float{SIMD256T::loadu_ps(p), SIMD256T::loadu_ps(p + TARGET_SIMD_WIDTH)};
-}
-
-static SIMDINLINE Integer SIMDCALL
- loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
-{
- return Integer{
- SIMD256T::loadu_si(&p->v8[0]),
- SIMD256T::loadu_si(&p->v8[1]),
- };
-}
-
-// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
-{
- return Float{
- SIMD256T::template mask_i32gather_ps<ScaleT>(old.v8[0], p, idx.v8[0], mask.v8[0]),
- SIMD256T::template mask_i32gather_ps<ScaleT>(old.v8[1], p, idx.v8[1], mask.v8[1]),
- };
-}
-
-template <ScaleFactor ScaleT = ScaleFactor::SF_1>
-static SIMDINLINE Float SIMDCALL
- sw_mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
-{
- return Float{
- SIMD256T::template sw_mask_i32gather_ps<ScaleT>(old.v8[0], p, idx.v8[0], mask.v8[0]),
- SIMD256T::template sw_mask_i32gather_ps<ScaleT>(old.v8[1], p, idx.v8[1], mask.v8[1]),
- };
-}
-
-static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
-{
- SIMD256T::maskstore_ps(p, mask.v8[0], src.v8[0]);
- SIMD256T::maskstore_ps(p + TARGET_SIMD_WIDTH, mask.v8[1], src.v8[1]);
-}
-
-static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer const& a)
-{
- uint64_t mask = static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[0]));
- mask |= static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[1])) << (TARGET_SIMD_WIDTH * 4);
-
- return mask;
-}
-
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const& a)
-{
- uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[0]));
- mask |= static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[1])) << (TARGET_SIMD_WIDTH / 2);
-
- return mask;
-}
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const& a)
-{
- uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[0]));
- mask |= static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[1])) << TARGET_SIMD_WIDTH;
-
- return mask;
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value)
-{
- return Integer{SIMD256T::set1_epi32(i), SIMD256T::set1_epi32(i)};
-}
-
-static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements are same value)
-{
- return Integer{SIMD256T::set1_epi8(i), SIMD256T::set1_epi8(i)};
-}
-
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
-{
- return Float{SIMD256T::set1_ps(f), SIMD256T::set1_ps(f)};
-}
-
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
-{
- return Float{SIMD256T::setzero_ps(), SIMD256T::setzero_ps()};
-}
-
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
-{
- return Integer{SIMD256T::setzero_si(), SIMD256T::setzero_si()};
-}
-
-static SIMDINLINE void SIMDCALL
- store_ps(float* p, Float const& a) // *p = a (stores all elements contiguously in memory)
-{
- SIMD256T::store_ps(p, a.v8[0]);
- SIMD256T::store_ps(p + TARGET_SIMD_WIDTH, a.v8[1]);
-}
-
-static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer const& a) // *p = a
-{
- SIMD256T::store_si(&p->v8[0], a.v8[0]);
- SIMD256T::store_si(&p->v8[1], a.v8[1]);
-}
-
-static SIMDINLINE void SIMDCALL
- stream_ps(float* p, Float const& a) // *p = a (same as store_ps, but doesn't keep memory in cache)
-{
- SIMD256T::stream_ps(p, a.v8[0]);
- SIMD256T::stream_ps(p + TARGET_SIMD_WIDTH, a.v8[1]);
-}
-
-static SIMDINLINE Integer SIMDCALL set_epi32(int i15,
- int i14,
- int i13,
- int i12,
- int i11,
- int i10,
- int i9,
- int i8,
- int i7,
- int i6,
- int i5,
- int i4,
- int i3,
- int i2,
- int i1,
- int i0)
-{
- return Integer{SIMD256T::set_epi32(i7, i6, i5, i4, i3, i2, i1, i0),
- SIMD256T::set_epi32(i15, i14, i13, i12, i11, i10, i9, i8)};
-}
-
-static SIMDINLINE Integer SIMDCALL
- set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
-{
- return set_epi32(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Float SIMDCALL set_ps(float i15,
- float i14,
- float i13,
- float i12,
- float i11,
- float i10,
- float i9,
- float i8,
- float i7,
- float i6,
- float i5,
- float i4,
- float i3,
- float i2,
- float i1,
- float i0)
-{
- return Float{SIMD256T::set_ps(i7, i6, i5, i4, i3, i2, i1, i0),
- SIMD256T::set_ps(i15, i14, i13, i12, i11, i10, i9, i8)};
-}
-
-static SIMDINLINE Float SIMDCALL
- set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
-{
- return set_ps(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
-}
-
-static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
-{
- return Float{SIMD256T::vmask_ps(mask), SIMD256T::vmask_ps(mask >> TARGET_SIMD_WIDTH)};
-}
-
-#undef SIMD_WRAPPER_1
-#undef SIMD_WRAPPER_2
-#undef SIMD_WRAPPER_2I
-#undef SIMD_WRAPPER_2I_1
-#undef SIMD_WRAPPER_3
-#undef SIMD_IWRAPPER_1
-#undef SIMD_IWRAPPER_2
-#undef SIMD_IWRAPPER_2I
-#undef SIMD_IWRAPPER_2I_1
-#undef SIMD_IWRAPPER_3
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl
deleted file mode 100644
index 473934824ee..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl
+++ /dev/null
@@ -1,27 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#if !defined(__SIMD_LIB_AVX_HPP__)
-#error Do not include this file directly, use "simdlib.hpp" instead.
-#endif
-
-// no backwards compatibility for simd mask-enabled functions
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp
deleted file mode 100644
index 3d31b39ee55..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp
+++ /dev/null
@@ -1,332 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#pragma once
-#if 0
-//===========================================================================
-// Placeholder name representing either SIMD4, SIMD256, or SIMD16 structures.
-//===========================================================================
-struct SIMD256 // or SIMD4 or SIMD16
-{
- //=======================================================================
- // SIMD Types
- //
- // These typedefs are examples. The SIMD256 and SIMD16 implementations will
- // use different base types with this same naming.
- using Float = __m256; // Packed single-precision float vector
- using Double = __m256d; // Packed double-precision float vector
- using Integer = __m256i; // Packed integer vector (mutable element widths)
- using Mask = uint8_t; // Integer representing mask bits
-
- //=======================================================================
- // Standard interface
- // (available in both SIMD256 and SIMD16 widths)
- //=======================================================================
-
- //-----------------------------------------------------------------------
- // Single precision floating point arithmetic operations
- //-----------------------------------------------------------------------
- static Float add_ps(Float a, Float b); // return a + b
- static Float div_ps(Float a, Float b); // return a / b
- static Float fmadd_ps(Float a, Float b, Float c); // return (a * b) + c
- static Float fmsub_ps(Float a, Float b, Float c); // return (a * b) - c
- static Float max_ps(Float a, Float b); // return (a > b) ? a : b
- static Float min_ps(Float a, Float b); // return (a < b) ? a : b
- static Float mul_ps(Float a, Float b); // return a * b
- static Float rcp_ps(Float a); // return 1.0f / a
- static Float rsqrt_ps(Float a); // return 1.0f / sqrt(a)
- static Float sub_ps(Float a, Float b); // return a - b
-
- enum class RoundMode
- {
- TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + (signof(value))0.5)
- TO_NEG_INF = 0x01, // Round to negative infinity
- TO_POS_INF = 0x02, // Round to positive infinity
- TO_ZERO = 0x03, // Round to 0 a.k.a. truncate
- CUR_DIRECTION = 0x04, // Round in direction set in MXCSR register
-
- RAISE_EXC = 0x00, // Raise exception on overflow
- NO_EXC = 0x08, // Suppress exceptions
-
- NINT = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
- NINT_NOEXC = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
- FLOOR = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
- FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
- CEIL = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
- CEIL_NOEXC = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
- TRUNC = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
- TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
- RINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
- NEARBYINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
- };
-
- // return round_func(a)
- //
- // round_func is chosen on the RMT template parameter. See the documentation
- // for the RoundMode enumeration above.
- template <RoundMode RMT>
- static Float round_ps(Float a); // return round(a)
-
-
- //-----------------------------------------------------------------------
- // Integer (various width) arithmetic operations
- //-----------------------------------------------------------------------
- static Integer abs_epi32(Integer a); // return absolute_value(a) (int32)
- static Integer add_epi32(Integer a, Integer b); // return a + b (int32)
- static Integer add_epi8(Integer a, Integer b); // return a + b (int8)
- static Integer adds_epu8(Integer a, Integer b); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
- static Integer max_epi32(Integer a, Integer b); // return (a > b) ? a : b (int32)
- static Integer max_epu32(Integer a, Integer b); // return (a > b) ? a : b (uint32)
- static Integer min_epi32(Integer a, Integer b); // return (a < b) ? a : b (int32)
- static Integer min_epu32(Integer a, Integer b); // return (a < b) ? a : b (uint32)
- static Integer mul_epi32(Integer a, Integer b); // return a * b (int32)
-
- // return (a * b) & 0xFFFFFFFF
- //
- // Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
- // and store the low 32 bits of the intermediate integers in dst.
- static Float mullo_epi32(Integer a, Integer b);
-
- static Integer sub_epi32(Integer a, Integer b); // return a - b (int32)
- static Integer sub_epi64(Integer a, Integer b); // return a - b (int64)
- static Integer subs_epu8(Integer a, Integer b); // return (b > a) ? 0 : (a - b) (uint8)
-
- //-----------------------------------------------------------------------
- // Logical operations
- //-----------------------------------------------------------------------
- static Float and_ps(Float a, Float b); // return a & b (float treated as int)
- static Integer and_si(Integer a, Integer b); // return a & b (int)
- static Float andnot_ps(Float a, Float b); // return (~a) & b (float treated as int)
- static Integer andnot_si(Integer a, Integer b); // return (~a) & b (int)
- static Float or_ps(Float a, Float b); // return a | b (float treated as int)
- static Float or_si(Integer a, Integer b); // return a | b (int)
- static Float xor_ps(Float a, Float b); // return a ^ b (float treated as int)
- static Integer xor_si(Integer a, Integer b); // return a ^ b (int)
-
- //-----------------------------------------------------------------------
- // Shift operations
- //-----------------------------------------------------------------------
- template<int ImmT>
- static Integer slli_epi32(Integer a); // return a << ImmT
- static Integer sllv_epi32(Integer a, Integer b); // return a << b
- template<int ImmT>
- static Integer srai_epi32(Integer a); // return a >> ImmT (int32)
- template<int ImmT>
- static Integer srli_epi32(Integer a); // return a >> ImmT (uint32)
- template<int ImmT> // for each 128-bit lane:
- static Integer srli_si(Integer a); // return a >> (ImmT*8) (uint)
- template<int ImmT>
- static Float srlisi_ps(Float a); // same as srli_si, but with Float cast to int
- static Integer srlv_epi32(Integer a, Integer b); // return a >> b (uint32)
-
- //-----------------------------------------------------------------------
- // Conversion operations
- //-----------------------------------------------------------------------
- static Float castpd_ps(Double a); // return *(Float*)(&a)
- static Integer castps_si(Float a); // return *(Integer*)(&a)
- static Double castsi_pd(Integer a); // return *(Double*)(&a)
- static Double castps_pd(Float a); // return *(Double*)(&a)
- static Float castsi_ps(Integer a); // return *(Float*)(&a)
- static Float cvtepi32_ps(Integer a); // return (float)a (int32 --> float)
- static Integer cvtepu8_epi16(Integer a); // return (int16)a (uint8 --> int16)
- static Integer cvtepu8_epi32(Integer a); // return (int32)a (uint8 --> int32)
- static Integer cvtepu16_epi32(Integer a); // return (int32)a (uint16 --> int32)
- static Integer cvtepu16_epi64(Integer a); // return (int64)a (uint16 --> int64)
- static Integer cvtepu32_epi64(Integer a); // return (int64)a (uint32 --> int64)
- static Integer cvtps_epi32(Float a); // return (int32)a (float --> int32)
- static Integer cvttps_epi32(Float a); // return (int32)a (rnd_to_zero(float) --> int32)
-
- //-----------------------------------------------------------------------
- // Comparison operations
- //-----------------------------------------------------------------------
-
- // Comparison types used with cmp_ps:
- // - ordered comparisons are always false if either operand is NaN
- // - unordered comparisons are always true if either operand is NaN
- // - signaling comparisons raise an exception if either operand is NaN
- // - non-signaling comparisons will never raise an exception
- //
- // Ordered: return (a != NaN) && (b != NaN) && (a cmp b)
- // Unordered: return (a == NaN) || (b == NaN) || (a cmp b)
- enum class CompareType
- {
- EQ_OQ = 0x00, // Equal (ordered, nonsignaling)
- LT_OS = 0x01, // Less-than (ordered, signaling)
- LE_OS = 0x02, // Less-than-or-equal (ordered, signaling)
- UNORD_Q = 0x03, // Unordered (nonsignaling)
- NEQ_UQ = 0x04, // Not-equal (unordered, nonsignaling)
- NLT_US = 0x05, // Not-less-than (unordered, signaling)
- NLE_US = 0x06, // Not-less-than-or-equal (unordered, signaling)
- ORD_Q = 0x07, // Ordered (nonsignaling)
- EQ_UQ = 0x08, // Equal (unordered, non-signaling)
- NGE_US = 0x09, // Not-greater-than-or-equal (unordered, signaling)
- NGT_US = 0x0A, // Not-greater-than (unordered, signaling)
- FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
- NEQ_OQ = 0x0C, // Not-equal (ordered, non-signaling)
- GE_OS = 0x0D, // Greater-than-or-equal (ordered, signaling)
- GT_OS = 0x0E, // Greater-than (ordered, signaling)
- TRUE_UQ = 0x0F, // True (unordered, non-signaling)
- EQ_OS = 0x10, // Equal (ordered, signaling)
- LT_OQ = 0x11, // Less-than (ordered, nonsignaling)
- LE_OQ = 0x12, // Less-than-or-equal (ordered, nonsignaling)
- UNORD_S = 0x13, // Unordered (signaling)
- NEQ_US = 0x14, // Not-equal (unordered, signaling)
- NLT_UQ = 0x15, // Not-less-than (unordered, nonsignaling)
- NLE_UQ = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
- ORD_S = 0x17, // Ordered (signaling)
- EQ_US = 0x18, // Equal (unordered, signaling)
- NGE_UQ = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
- NGT_UQ = 0x1A, // Not-greater-than (unordered, nonsignaling)
- FALSE_OS = 0x1B, // False (ordered, signaling)
- NEQ_OS = 0x1C, // Not-equal (ordered, signaling)
- GE_OQ = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
- GT_OQ = 0x1E, // Greater-than (ordered, nonsignaling)
- TRUE_US = 0x1F, // True (unordered, signaling)
- };
-
- // return a (CmpTypeT) b (float)
- //
- // See documentation for CompareType above for valid values for CmpTypeT.
- template<CompareType CmpTypeT>
- static Float cmp_ps(Float a, Float b); // return a (CmtTypeT) b (see above)
- static Float cmpgt_ps(Float a, Float b); // return cmp_ps<CompareType::GT_OQ>(a, b)
- static Float cmple_ps(Float a, Float b); // return cmp_ps<CompareType::LE_OQ>(a, b)
- static Float cmplt_ps(Float a, Float b); // return cmp_ps<CompareType::LT_OQ>(a, b)
- static Float cmpneq_ps(Float a, Float b); // return cmp_ps<CompareType::NEQ_OQ>(a, b)
- static Float cmpeq_ps(Float a, Float b); // return cmp_ps<CompareType::EQ_OQ>(a, b)
- static Float cmpge_ps(Float a, Float b); // return cmp_ps<CompareType::GE_OQ>(a, b)
- static Integer cmpeq_epi8(Integer a, Integer b); // return a == b (int8)
- static Integer cmpeq_epi16(Integer a, Integer b); // return a == b (int16)
- static Integer cmpeq_epi32(Integer a, Integer b); // return a == b (int32)
- static Integer cmpeq_epi64(Integer a, Integer b); // return a == b (int64)
- static Integer cmpgt_epi8(Integer a, Integer b); // return a > b (int8)
- static Integer cmpgt_epi16(Integer a, Integer b); // return a > b (int16)
- static Integer cmpgt_epi32(Integer a, Integer b); // return a > b (int32)
- static Integer cmpgt_epi64(Integer a, Integer b); // return a > b (int64)
- static Integer cmplt_epi32(Integer a, Integer b); // return a < b (int32)
- static bool testz_ps(Float a, Float b); // return all_lanes_zero(a & b) ? 1 : 0 (float)
- static bool testz_si(Integer a, Integer b); // return all_lanes_zero(a & b) ? 1 : 0 (int)
-
- //-----------------------------------------------------------------------
- // Blend / shuffle / permute operations
- //-----------------------------------------------------------------------
- template<int ImmT>
- static Float blend_ps(Float a, Float b); // return ImmT ? b : a (float)
- static Integer blendv_epi32(Integer a, Integer b, Float mask); // return mask ? b : a (int)
- static Float blendv_ps(Float a, Float b, Float mask); // return mask ? b : a (float)
- static Float broadcast_ss(float const *p); // return *p (all elements in vector get same value)
- static Integer packs_epi16(Integer a, Integer b); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
- static Integer packs_epi32(Integer a, Integer b); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
- static Integer packus_epi16(Integer a, Integer b); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
- static Integer packus_epi32(Integer a, Integer b); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
- static Float permute_epi32(Integer a, Integer swiz); // return a[swiz[i]] for each 32-bit lane i (int32)
- static Float permute_ps(Float a, Integer swiz); // return a[swiz[i]] for each 32-bit lane i (float)
- template<int SwizT>
- static Integer shuffle_epi32(Integer a, Integer b);
- template<int SwizT>
- static Integer shuffle_epi64(Integer a, Integer b);
- static Integer shuffle_epi8(Integer a, Integer b);
- template<int SwizT>
- static Float shuffle_pd(Double a, Double b);
- template<int SwizT>
- static Float shuffle_ps(Float a, Float b);
- static Integer unpackhi_epi16(Integer a, Integer b);
- static Integer unpackhi_epi32(Integer a, Integer b);
- static Integer unpackhi_epi64(Integer a, Integer b);
- static Integer unpackhi_epi8(Integer a, Integer b);
- static Float unpackhi_pd(Double a, Double b);
- static Float unpackhi_ps(Float a, Float b);
- static Integer unpacklo_epi16(Integer a, Integer b);
- static Integer unpacklo_epi32(Integer a, Integer b);
- static Integer unpacklo_epi64(Integer a, Integer b);
- static Integer unpacklo_epi8(Integer a, Integer b);
- static Float unpacklo_pd(Double a, Double b);
- static Float unpacklo_ps(Float a, Float b);
-
- //-----------------------------------------------------------------------
- // Load / store operations
- //-----------------------------------------------------------------------
- enum class ScaleFactor
- {
- SF_1, // No scaling
- SF_2, // Scale offset by 2
- SF_4, // Scale offset by 4
- SF_8, // Scale offset by 8
- };
-
- template<ScaleFactor ScaleT = ScaleFactor::SF_1>
- static Float i32gather_ps(float const* p, Integer idx); // return *(float*)(((int8*)p) + (idx * ScaleT))
- static Float load1_ps(float const *p); // return *p (broadcast 1 value to all elements)
- static Float load_ps(float const *p); // return *p (loads SIMD width elements from memory)
- static Integer load_si(Integer const *p); // return *p
- static Float loadu_ps(float const *p); // return *p (same as load_ps but allows for unaligned mem)
- static Integer loadu_si(Integer const *p); // return *p (same as load_si but allows for unaligned mem)
-
- // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
- template<int ScaleT>
- static Float mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask);
-
- static void maskstore_ps(float *p, Integer mask, Float src);
- static int movemask_epi8(Integer a);
- static int movemask_pd(Double a);
- static int movemask_ps(Float a);
- static Integer set1_epi32(int i); // return i (all elements are same value)
- static Integer set1_epi8(char i); // return i (all elements are same value)
- static Float set1_ps(float f); // return f (all elements are same value)
- static Float setzero_ps(); // return 0 (float)
- static Integer setzero_si(); // return 0 (integer)
- static void store_ps(float *p, Float a); // *p = a (stores all elements contiguously in memory)
- static void store_si(Integer *p, Integer a); // *p = a
- static void stream_ps(float *p, Float a); // *p = a (same as store_ps, but doesn't keep memory in cache)
-
- //=======================================================================
- // Legacy interface (available only in SIMD256 width)
- //=======================================================================
-
- static Float broadcast_ps(__m128 const *p);
- template<int ImmT>
- static __m128d extractf128_pd(Double a);
- template<int ImmT>
- static __m128 extractf128_ps(Float a);
- template<int ImmT>
- static __m128i extractf128_si(Integer a);
- template<int ImmT>
- static Double insertf128_pd(Double a, __m128d b);
- template<int ImmT>
- static Float insertf128_ps(Float a, __m128 b);
- template<int ImmT>
- static Integer insertf128_si(Integer a, __m128i b);
- static Integer loadu2_si(__m128 const* phi, __m128 const* plo);
- template<int ImmT>
- static Double permute2f128_pd(Double a, Double b);
- template<int ImmT>
- static Float permute2f128_ps(Float a, Float b);
- template<int ImmT>
- static Integer permute2f128_si(Integer a, Integer b);
- static Integer set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0);
- static void storeu2_si(__m128i *phi, __m128i *plo, Integer src);
-
- //=======================================================================
- // Advanced masking interface (currently available only in SIMD16 width)
- //=======================================================================
-};
-#endif // #if 0
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
deleted file mode 100644
index 3ef847d4ca4..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
+++ /dev/null
@@ -1,457 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-#pragma once
-
-#if !defined(__cplusplus)
-#error C++ compilation required
-#endif
-
-#include <immintrin.h>
-#include <inttypes.h>
-#include <stdint.h>
-
-#define SIMD_ARCH_AVX 0
-#define SIMD_ARCH_AVX2 1
-#define SIMD_ARCH_AVX512 2
-
-#if !defined(SIMD_ARCH)
-#define SIMD_ARCH SIMD_ARCH_AVX
-#endif
-
-#if defined(_MSC_VER)
-#define SIMDCALL __vectorcall
-#define SIMDINLINE __forceinline
-#define SIMDALIGN(type_, align_) __declspec(align(align_)) type_
-#else
-#define SIMDCALL
-#define SIMDINLINE inline
-#define SIMDALIGN(type_, align_) type_ __attribute__((aligned(align_)))
-#endif
-
-// For documentation, please see the following include...
-// #include "simdlib_interface.hpp"
-
-namespace SIMDImpl
-{
- enum class CompareType
- {
- EQ_OQ = 0x00, // Equal (ordered, nonsignaling)
- LT_OS = 0x01, // Less-than (ordered, signaling)
- LE_OS = 0x02, // Less-than-or-equal (ordered, signaling)
- UNORD_Q = 0x03, // Unordered (nonsignaling)
- NEQ_UQ = 0x04, // Not-equal (unordered, nonsignaling)
- NLT_US = 0x05, // Not-less-than (unordered, signaling)
- NLE_US = 0x06, // Not-less-than-or-equal (unordered, signaling)
- ORD_Q = 0x07, // Ordered (nonsignaling)
- EQ_UQ = 0x08, // Equal (unordered, non-signaling)
- NGE_US = 0x09, // Not-greater-than-or-equal (unordered, signaling)
- NGT_US = 0x0A, // Not-greater-than (unordered, signaling)
- FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
- NEQ_OQ = 0x0C, // Not-equal (ordered, non-signaling)
- GE_OS = 0x0D, // Greater-than-or-equal (ordered, signaling)
- GT_OS = 0x0E, // Greater-than (ordered, signaling)
- TRUE_UQ = 0x0F, // True (unordered, non-signaling)
- EQ_OS = 0x10, // Equal (ordered, signaling)
- LT_OQ = 0x11, // Less-than (ordered, nonsignaling)
- LE_OQ = 0x12, // Less-than-or-equal (ordered, nonsignaling)
- UNORD_S = 0x13, // Unordered (signaling)
- NEQ_US = 0x14, // Not-equal (unordered, signaling)
- NLT_UQ = 0x15, // Not-less-than (unordered, nonsignaling)
- NLE_UQ = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
- ORD_S = 0x17, // Ordered (signaling)
- EQ_US = 0x18, // Equal (unordered, signaling)
- NGE_UQ = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
- NGT_UQ = 0x1A, // Not-greater-than (unordered, nonsignaling)
- FALSE_OS = 0x1B, // False (ordered, signaling)
- NEQ_OS = 0x1C, // Not-equal (ordered, signaling)
- GE_OQ = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
- GT_OQ = 0x1E, // Greater-than (ordered, nonsignaling)
- TRUE_US = 0x1F, // True (unordered, signaling)
- };
-
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- enum class CompareTypeInt
- {
- EQ = _MM_CMPINT_EQ, // Equal
- LT = _MM_CMPINT_LT, // Less than
- LE = _MM_CMPINT_LE, // Less than or Equal
- NE = _MM_CMPINT_NE, // Not Equal
- GE = _MM_CMPINT_GE, // Greater than or Equal
- GT = _MM_CMPINT_GT, // Greater than
- };
-#endif // SIMD_ARCH >= SIMD_ARCH_AVX512
-
- enum class ScaleFactor
- {
- SF_1 = 1, // No scaling
- SF_2 = 2, // Scale offset by 2
- SF_4 = 4, // Scale offset by 4
- SF_8 = 8, // Scale offset by 8
- };
-
- enum class RoundMode
- {
- TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + 0.5)
- TO_NEG_INF = 0x01, // Round to negative infinity
- TO_POS_INF = 0x02, // Round to positive infinity
- TO_ZERO = 0x03, // Round to 0 a.k.a. truncate
- CUR_DIRECTION = 0x04, // Round in direction set in MXCSR register
-
- RAISE_EXC = 0x00, // Raise exception on overflow
- NO_EXC = 0x08, // Suppress exceptions
-
- NINT = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
- NINT_NOEXC = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
- FLOOR = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
- FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
- CEIL = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
- CEIL_NOEXC = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
- TRUNC = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
- TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
- RINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
- NEARBYINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
- };
-
- struct Traits
- {
- using CompareType = SIMDImpl::CompareType;
- using ScaleFactor = SIMDImpl::ScaleFactor;
- using RoundMode = SIMDImpl::RoundMode;
- };
-
- // Attribute, 4-dimensional attribute in SIMD SOA layout
- template <typename Float, typename Integer, typename Double>
- union Vec4
- {
- Float v[4];
- Integer vi[4];
- Double vd[4];
- struct
- {
- Float x;
- Float y;
- Float z;
- Float w;
- };
- SIMDINLINE Float& SIMDCALL operator[](const int i) { return v[i]; }
- SIMDINLINE Float const& SIMDCALL operator[](const int i) const { return v[i]; }
- SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const& in)
- {
- v[0] = in.v[0];
- v[1] = in.v[1];
- v[2] = in.v[2];
- v[3] = in.v[3];
- return *this;
- }
- };
-
- namespace SIMD128Impl
- {
- union Float
- {
- SIMDINLINE Float() = default;
- SIMDINLINE Float(__m128 in) : v(in) {}
- SIMDINLINE Float& SIMDCALL operator=(__m128 in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Float& SIMDCALL operator=(Float const& in)
- {
- v = in.v;
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m128() const { return v; }
-
- SIMDALIGN(__m128, 16) v;
- };
-
- union Integer
- {
- SIMDINLINE Integer() = default;
- SIMDINLINE Integer(__m128i in) : v(in) {}
- SIMDINLINE Integer& SIMDCALL operator=(__m128i in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
- {
- v = in.v;
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m128i() const { return v; }
-
- SIMDALIGN(__m128i, 16) v;
- };
-
- union Double
- {
- SIMDINLINE Double() = default;
- SIMDINLINE Double(__m128d in) : v(in) {}
- SIMDINLINE Double& SIMDCALL operator=(__m128d in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Double& SIMDCALL operator=(Double const& in)
- {
- v = in.v;
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m128d() const { return v; }
-
- SIMDALIGN(__m128d, 16) v;
- };
-
- using Vec4 = SIMDImpl::Vec4<Float, Integer, Double>;
- using Mask = uint8_t;
-
- static const uint32_t SIMD_WIDTH = 4;
- } // namespace SIMD128Impl
-
- namespace SIMD256Impl
- {
- union Float
- {
- SIMDINLINE Float() = default;
- SIMDINLINE Float(__m256 in) : v(in) {}
- SIMDINLINE Float(SIMD128Impl::Float const& in_lo,
- SIMD128Impl::Float const& in_hi = _mm_setzero_ps())
- {
- v = _mm256_insertf128_ps(_mm256_castps128_ps256(in_lo), in_hi, 0x1);
- }
- SIMDINLINE Float& SIMDCALL operator=(__m256 in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Float& SIMDCALL operator=(Float const& in)
- {
- v = in.v;
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m256() const { return v; }
-
- SIMDALIGN(__m256, 32) v;
- SIMD128Impl::Float v4[2];
- };
-
- union Integer
- {
- SIMDINLINE Integer() = default;
- SIMDINLINE Integer(__m256i in) : v(in) {}
- SIMDINLINE Integer(SIMD128Impl::Integer const& in_lo,
- SIMD128Impl::Integer const& in_hi = _mm_setzero_si128())
- {
- v = _mm256_insertf128_si256(_mm256_castsi128_si256(in_lo), in_hi, 0x1);
- }
- SIMDINLINE Integer& SIMDCALL operator=(__m256i in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
- {
- v = in.v;
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m256i() const { return v; }
-
- SIMDALIGN(__m256i, 32) v;
- SIMD128Impl::Integer v4[2];
- };
-
- union Double
- {
- SIMDINLINE Double() = default;
- SIMDINLINE Double(__m256d const& in) : v(in) {}
- SIMDINLINE Double(SIMD128Impl::Double const& in_lo,
- SIMD128Impl::Double const& in_hi = _mm_setzero_pd())
- {
- v = _mm256_insertf128_pd(_mm256_castpd128_pd256(in_lo), in_hi, 0x1);
- }
- SIMDINLINE Double& SIMDCALL operator=(__m256d in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Double& SIMDCALL operator=(Double const& in)
- {
- v = in.v;
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m256d() const { return v; }
-
- SIMDALIGN(__m256d, 32) v;
- SIMD128Impl::Double v4[2];
- };
-
- using Vec4 = SIMDImpl::Vec4<Float, Integer, Double>;
- using Mask = uint8_t;
-
- static const uint32_t SIMD_WIDTH = 8;
- } // namespace SIMD256Impl
-
- namespace SIMD512Impl
- {
-#if !(defined(__AVX512F__) || defined(_ZMMINTRIN_H_INCLUDED))
- // Define AVX512 types if not included via immintrin.h.
- // All data members of these types are ONLY to viewed
- // in a debugger. Do NOT access them via code!
- union __m512
- {
- private:
- float m512_f32[16];
- };
- struct __m512d
- {
- private:
- double m512d_f64[8];
- };
-
- union __m512i
- {
- private:
- int8_t m512i_i8[64];
- int16_t m512i_i16[32];
- int32_t m512i_i32[16];
- int64_t m512i_i64[8];
- uint8_t m512i_u8[64];
- uint16_t m512i_u16[32];
- uint32_t m512i_u32[16];
- uint64_t m512i_u64[8];
- };
-
- using __mmask16 = uint16_t;
-#endif
-
-#if defined(__INTEL_COMPILER) || (SIMD_ARCH >= SIMD_ARCH_AVX512)
-#define SIMD_ALIGNMENT_BYTES 64
-#else
-#define SIMD_ALIGNMENT_BYTES 32
-#endif
-
- union Float
- {
- SIMDINLINE Float() = default;
- SIMDINLINE Float(__m512 in) : v(in) {}
- SIMDINLINE Float(SIMD256Impl::Float const& in_lo,
- SIMD256Impl::Float const& in_hi = _mm256_setzero_ps())
- {
- v8[0] = in_lo;
- v8[1] = in_hi;
- }
- SIMDINLINE Float& SIMDCALL operator=(__m512 in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Float& SIMDCALL operator=(Float const& in)
- {
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- v = in.v;
-#else
- v8[0] = in.v8[0];
- v8[1] = in.v8[1];
-#endif
- return *this;
- }
- SIMDINLINE SIMDCALL operator __m512() const { return v; }
-
- SIMDALIGN(__m512, SIMD_ALIGNMENT_BYTES) v;
- SIMD256Impl::Float v8[2];
- };
-
- union Integer
- {
- SIMDINLINE Integer() = default;
- SIMDINLINE Integer(__m512i in) : v(in) {}
- SIMDINLINE Integer(SIMD256Impl::Integer const& in_lo,
- SIMD256Impl::Integer const& in_hi = _mm256_setzero_si256())
- {
- v8[0] = in_lo;
- v8[1] = in_hi;
- }
- SIMDINLINE Integer& SIMDCALL operator=(__m512i in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
- {
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- v = in.v;
-#else
- v8[0] = in.v8[0];
- v8[1] = in.v8[1];
-#endif
- return *this;
- }
-
- SIMDINLINE SIMDCALL operator __m512i() const { return v; }
-
- SIMDALIGN(__m512i, SIMD_ALIGNMENT_BYTES) v;
- SIMD256Impl::Integer v8[2];
- };
-
- union Double
- {
- SIMDINLINE Double() = default;
- SIMDINLINE Double(__m512d in) : v(in) {}
- SIMDINLINE Double(SIMD256Impl::Double const& in_lo,
- SIMD256Impl::Double const& in_hi = _mm256_setzero_pd())
- {
- v8[0] = in_lo;
- v8[1] = in_hi;
- }
- SIMDINLINE Double& SIMDCALL operator=(__m512d in)
- {
- v = in;
- return *this;
- }
- SIMDINLINE Double& SIMDCALL operator=(Double const& in)
- {
-#if SIMD_ARCH >= SIMD_ARCH_AVX512
- v = in.v;
-#else
- v8[0] = in.v8[0];
- v8[1] = in.v8[1];
-#endif
- return *this;
- }
-
- SIMDINLINE SIMDCALL operator __m512d() const { return v; }
-
- SIMDALIGN(__m512d, SIMD_ALIGNMENT_BYTES) v;
- SIMD256Impl::Double v8[2];
- };
-
- typedef SIMDImpl::Vec4<Float, Integer, Double> SIMDALIGN(Vec4, 64);
- using Mask = __mmask16;
-
- static const uint32_t SIMD_WIDTH = 16;
-
-#undef SIMD_ALIGNMENT_BYTES
- } // namespace SIMD512Impl
-} // namespace SIMDImpl
diff --git a/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp b/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp
deleted file mode 100644
index 0f5382044c2..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#include "common/os.h"
-#include <stdarg.h>
-#include <stdio.h>
-#include <assert.h>
-#include <algorithm>
-#include <mutex>
-
-#if SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
-
-#if defined(_MSC_VER)
-#pragma comment(lib, "user32.lib")
-#endif // _WIN32
-
-namespace ConsoleUtils
-{
- enum class TextColor
- {
- BLACK = 0,
-#if defined(_WIN32)
- RED = 4,
- GREEN = 2,
- BLUE = 1,
-#else
- RED = 1,
- GREEN = 2,
- BLUE = 4,
-#endif // _WIN32
- PURPLE = static_cast<uint32_t>(RED) | static_cast<uint32_t>(BLUE),
- CYAN = static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
- YELLOW = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN),
- WHITE =
- static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
- };
-
- enum class TextStyle
- {
- NORMAL = 0,
- INTENSITY = 1,
- };
-
- void SetTextColor(FILE* stream,
- TextColor color = TextColor::WHITE,
- TextStyle style = TextStyle::NORMAL)
- {
-#if defined(_WIN32)
-
- HANDLE hConsoleHandle = nullptr;
- if (stream == stderr)
- {
- hConsoleHandle = GetStdHandle(STD_ERROR_HANDLE);
- }
- else if (stream == stdout)
- {
- hConsoleHandle = GetStdHandle(STD_OUTPUT_HANDLE);
- }
- else
- {
- // Not a console stream, do nothing
- return;
- }
-
- WORD textAttributes = static_cast<WORD>(color);
- if (style == TextStyle::INTENSITY)
- {
- textAttributes |= FOREGROUND_INTENSITY;
- }
- SetConsoleTextAttribute(hConsoleHandle, textAttributes);
-
-#else // !_WIN32
-
- // Print ANSI codes
- uint32_t cc =
- 30 + ((style == TextStyle::INTENSITY) ? 60 : 0) + static_cast<uint32_t>(color);
- fprintf(stream, "\033[0m\033[%d;%dm", static_cast<uint32_t>(style), cc);
-
-#endif
- }
-
- void ResetTextColor(FILE* stream)
- {
-#if defined(_WIN32)
-
- SetTextColor(stream);
-
-#else // !_WIN32
-
- // Print ANSI codes
- fprintf(stream, "\033[0m");
-
-#endif
- }
-
- static std::mutex g_stderrMutex;
-} // namespace ConsoleUtils
-
-bool SwrAssert(bool chkDebugger,
- bool& enabled,
- const char* pExpression,
- const char* pFileName,
- uint32_t lineNum,
- const char* pFunction,
- const char* pFmtString,
- ...)
-{
- using namespace ConsoleUtils;
- std::lock_guard<std::mutex> l(g_stderrMutex);
-
- SetTextColor(stderr, TextColor::CYAN, TextStyle::NORMAL);
-
- fprintf(stderr, "%s(%d): ", pFileName, lineNum);
-
- SetTextColor(stderr, TextColor::RED, TextStyle::INTENSITY);
-
- fprintf(stderr, "ASSERT: %s\n", pExpression);
-
- SetTextColor(stderr, TextColor::CYAN, TextStyle::INTENSITY);
- fprintf(stderr, "\t%s\n", pFunction);
-
- if (pFmtString)
- {
- SetTextColor(stderr, TextColor::YELLOW, TextStyle::INTENSITY);
- fprintf(stderr, "\t");
- va_list args;
- va_start(args, pFmtString);
- vfprintf(stderr, pFmtString, args);
- va_end(args);
- fprintf(stderr, "\n");
- }
- ResetTextColor(stderr);
- fflush(stderr);
-
-#if defined(_WIN32)
- static const int MAX_MESSAGE_LEN = 2048;
- char msgBuf[MAX_MESSAGE_LEN];
-
- sprintf_s(msgBuf, "%s(%d): ASSERT: %s\n", pFileName, lineNum, pExpression);
- msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
- msgBuf[MAX_MESSAGE_LEN - 1] = 0;
- OutputDebugStringA(msgBuf);
-
- sprintf_s(msgBuf, "\t%s\n", pFunction);
- msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
- msgBuf[MAX_MESSAGE_LEN - 1] = 0;
- OutputDebugStringA(msgBuf);
-
- int offset = 0;
-
- if (pFmtString)
- {
- va_list args;
- va_start(args, pFmtString);
- offset = _vsnprintf_s(msgBuf, sizeof(msgBuf), sizeof(msgBuf), pFmtString, args);
- va_end(args);
-
- if (offset < 0)
- {
- return true;
- }
-
- OutputDebugStringA("\t");
- OutputDebugStringA(msgBuf);
- OutputDebugStringA("\n");
- }
-
- if (enabled && KNOB_ENABLE_ASSERT_DIALOGS)
- {
- int retval = sprintf_s(&msgBuf[offset],
- MAX_MESSAGE_LEN - offset,
- "\n\n"
- "File: %s\n"
- "Line: %d\n"
- "\n"
- "Expression: %s\n\n"
- "Cancel: Disable this assert for the remainder of the process\n"
- "Try Again: Break into the debugger\n"
- "Continue: Continue execution (but leave assert enabled)",
- pFileName,
- lineNum,
- pExpression);
-
- if (retval < 0)
- {
- return true;
- }
-
- offset += retval;
-
- if (!IsDebuggerPresent())
- {
- sprintf_s(&msgBuf[offset],
- MAX_MESSAGE_LEN - offset,
- "\n\n*** NO DEBUGGER DETECTED ***\n\nPressing \"Try Again\" will cause a "
- "program crash!");
- }
-
- retval = MessageBoxA(nullptr,
- msgBuf,
- "Assert Failed",
- MB_CANCELTRYCONTINUE | MB_ICONEXCLAMATION | MB_SETFOREGROUND);
-
- switch (retval)
- {
- case IDCANCEL:
- enabled = false;
- return false;
-
- case IDTRYAGAIN:
- return true;
-
- case IDCONTINUE:
- return false;
- }
- }
- else
- {
- return (IsDebuggerPresent() || !chkDebugger) && enabled;
- }
-#endif // _WIN32
-
- return enabled;
-}
-
-void SwrTrace(
- const char* pFileName, uint32_t lineNum, const char* pFunction, const char* pFmtString, ...)
-{
- using namespace ConsoleUtils;
- std::lock_guard<std::mutex> l(g_stderrMutex);
-
- SetTextColor(stderr, TextColor::CYAN, TextStyle::NORMAL);
-
- fprintf(stderr, "%s(%d): TRACE in %s:\n", pFileName, lineNum, pFunction);
-
- if (pFmtString)
- {
- SetTextColor(stderr, TextColor::PURPLE, TextStyle::INTENSITY);
- fprintf(stderr, "\t");
- va_list args;
- va_start(args, pFmtString);
- vfprintf(stderr, pFmtString, args);
- va_end(args);
- fprintf(stderr, "\n");
- }
- ResetTextColor(stderr);
- fflush(stderr);
-
-#if defined(_WIN32)
- static const int MAX_MESSAGE_LEN = 2048;
- char msgBuf[MAX_MESSAGE_LEN];
-
- sprintf_s(msgBuf, "%s(%d): TRACE in %s\n", pFileName, lineNum, pFunction);
- msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
- msgBuf[MAX_MESSAGE_LEN - 1] = 0;
- OutputDebugStringA(msgBuf);
-
- int offset = 0;
-
- if (pFmtString)
- {
- va_list args;
- va_start(args, pFmtString);
- offset = _vsnprintf_s(msgBuf, sizeof(msgBuf), sizeof(msgBuf), pFmtString, args);
- va_end(args);
-
- if (offset < 0)
- {
- return;
- }
-
- OutputDebugStringA("\t");
- OutputDebugStringA(msgBuf);
- OutputDebugStringA("\n");
- }
-#endif // _WIN32
-}
-
-#endif // SWR_ENABLE_ASSERTS
diff --git a/src/gallium/drivers/swr/rasterizer/common/swr_assert.h b/src/gallium/drivers/swr/rasterizer/common/swr_assert.h
deleted file mode 100644
index cd9854f2549..00000000000
--- a/src/gallium/drivers/swr/rasterizer/common/swr_assert.h
+++ /dev/null
@@ -1,242 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#ifndef __SWR_ASSERT_H__
-#define __SWR_ASSERT_H__
-
-#if !defined(__SWR_OS_H__)
-#error swr_assert.h should not be included directly, please include "common/os.h" instead.
-#endif
-
-//=============================================================================
-//
-// MACROS defined in this file:
-//
-// - SWR_ASSUME(expression, ...): Tell compiler that the expression is true.
-// Helps with static code analysis as well.
-// DO NOT USE if code after this dynamically
-// checks for errors and handles them. The
-// compiler may optimize out the error check.
-//
-// - SWR_ASSERT(expression, ...): Inform the user is expression is false.
-// This check is only conditionally made,
-// usually only in debug mode.
-//
-// - SWR_REL_ASSERT(expression, ...): Unconditionally enabled version of SWR_ASSERT
-//
-// - SWR_ASSUME_ASSERT(expression, ...): Conditionally enabled SWR_ASSERT. Uses
-// SWR_ASSUME if SWR_ASSERT is disabled.
-// DO NOT USE in combination with actual
-// error checking (see SWR_ASSUME)
-//
-// - SWR_REL_ASSUME_ASSERT(expression, ...): Same as SWR_REL_ASSERT.
-//
-//=============================================================================
-
-// Stupid preprocessor tricks to avoid -Wall / -W4 warnings
-#if defined(_MSC_VER)
-#define _SWR_WARN_DISABLE __pragma(warning(push)) __pragma(warning(disable : 4127))
-#define _SWR_WARN_RESTORE __pragma(warning(pop))
-#else // ! MSVC compiler
-#define _SWR_WARN_DISABLE
-#define _SWR_WARN_RESTORE
-#endif
-
-#define _SWR_MACRO_START \
- do \
- {
-#define _SWR_MACRO_END \
- _SWR_WARN_DISABLE \
- } \
- while (0) \
- _SWR_WARN_RESTORE
-
-#if defined(_MSC_VER)
-#define SWR_ASSUME(e, ...) \
- _SWR_MACRO_START __assume(e); \
- _SWR_MACRO_END
-#elif defined(__clang__)
-#define SWR_ASSUME(e, ...) \
- _SWR_MACRO_START __builtin_assume(e); \
- _SWR_MACRO_END
-#elif defined(__GNUC__)
-#define SWR_ASSUME(e, ...) \
- _SWR_MACRO_START((e) ? ((void)0) : __builtin_unreachable()); \
- _SWR_MACRO_END
-#else
-#define SWR_ASSUME(e, ...) \
- _SWR_MACRO_START ASSUME(e); \
- _SWR_MACRO_END
-#endif
-
-#if !defined(SWR_ENABLE_ASSERTS)
-
-#if !defined(NDEBUG)
-#define SWR_ENABLE_ASSERTS 1
-#else
-#define SWR_ENABLE_ASSERTS 0
-#endif // _DEBUG
-
-#endif // SWR_ENABLE_ASSERTS
-
-#if !defined(SWR_ENABLE_REL_ASSERTS)
-#define SWR_ENABLE_REL_ASSERTS 1
-#endif
-
-#if SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
-#include "assert.h"
-
-#if !defined(__cplusplus)
-
-#pragma message("C++ is required for SWR Asserts, falling back to assert.h")
-
-#if SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...) assert(e)
-#endif
-
-#if SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...) assert(e)
-#endif
-
-#else
-
-bool SwrAssert(bool chkDebugger,
- bool& enabled,
- const char* pExpression,
- const char* pFileName,
- uint32_t lineNum,
- const char* function,
- const char* pFmtString = nullptr,
- ...);
-
-void SwrTrace(
- const char* pFileName, uint32_t lineNum, const char* function, const char* pFmtString, ...);
-
-#define _SWR_ASSERT(chkDebugger, e, ...) \
- _SWR_MACRO_START \
- bool expFailed = !(e); \
- if (expFailed) \
- { \
- static bool swrAssertEnabled = true; \
- expFailed = SwrAssert( \
- chkDebugger, swrAssertEnabled, #e, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
- if (expFailed) \
- { \
- DEBUGBREAK; \
- } \
- } \
- _SWR_MACRO_END
-
-#define _SWR_INVALID(chkDebugger, ...) \
- _SWR_MACRO_START \
- static bool swrAssertEnabled = true; \
- bool expFailed = SwrAssert( \
- chkDebugger, swrAssertEnabled, "", __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
- if (expFailed) \
- { \
- DEBUGBREAK; \
- } \
- _SWR_MACRO_END
-
-#define _SWR_TRACE(_fmtstr, ...) SwrTrace(__FILE__, __LINE__, __FUNCTION__, _fmtstr, ##__VA_ARGS__);
-
-#if SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...) _SWR_ASSERT(true, e, ##__VA_ARGS__)
-#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSERT(e, ##__VA_ARGS__)
-#define SWR_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
-#endif // SWR_ENABLE_ASSERTS
-
-#if SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...) _SWR_ASSERT(false, e, ##__VA_ARGS__)
-#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_REL_ASSERT(e, ##__VA_ARGS__)
-#define SWR_REL_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
-
-// SWR_INVALID is always enabled
-// Funky handling to allow 0 arguments with g++/gcc
-// This is needed because you can't "swallow commas" with ##_VA_ARGS__ unless
-// there is a first argument to the macro. So having a macro that can optionally
-// accept 0 arguments is tricky.
-#define _SWR_INVALID_0() _SWR_INVALID(false)
-#define _SWR_INVALID_1(...) _SWR_INVALID(false, ##__VA_ARGS__)
-#define _SWR_INVALID_VARGS_(_10, _9, _8, _7, _6, _5, _4, _3, _2, _1, N, ...) N
-#define _SWR_INVALID_VARGS(...) _SWR_INVALID_VARGS_(__VA_ARGS__, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
-#define _SWR_INVALID_VARGS_0() 1, 2, 3, 4, 5, 6, 7, 9, 9, 10
-#define _SWR_INVALID_CONCAT_(a, b) a##b
-#define _SWR_INVALID_CONCAT(a, b) _SWR_INVALID_CONCAT_(a, b)
-#define SWR_INVALID(...) \
- _SWR_INVALID_CONCAT(_SWR_INVALID_, _SWR_INVALID_VARGS(_SWR_INVALID_VARGS_0 __VA_ARGS__())) \
- (__VA_ARGS__)
-
-#define SWR_STATIC_ASSERT(expression, ...) \
- static_assert((expression), "Failed:\n " #expression "\n " __VA_ARGS__);
-
-#endif // SWR_ENABLE_REL_ASSERTS
-
-#endif // C++
-
-#endif // SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
-
-// Needed to allow passing bitfield members to sizeof() in disabled asserts
-template <typename T>
-static bool SwrSizeofWorkaround(T)
-{
- return false;
-}
-
-#if !SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...) \
- _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
- _SWR_MACRO_END
-#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
-#define SWR_TRACE(_fmtstr, ...) \
- _SWR_MACRO_START(void)(0); \
- _SWR_MACRO_END
-#endif
-
-#if !SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...) \
- _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
- _SWR_MACRO_END
-#define SWR_INVALID(...) \
- _SWR_MACRO_START(void)(0); \
- _SWR_MACRO_END
-#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
-#define SWR_REL_TRACE(_fmtstr, ...) \
- _SWR_MACRO_START(void)(0); \
- _SWR_MACRO_END
-#define SWR_STATIC_ASSERT(e, ...) \
- _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
- _SWR_MACRO_END
-#endif
-
-#if defined(_MSC_VER)
-#define SWR_FUNCTION_DECL __FUNCSIG__
-#elif (defined(__GNUC__) || defined(__clang__))
-#define SWR_FUNCTION_DECL __PRETTY_FUNCTION__
-#else
-#define SWR_FUNCTION_DECL __FUNCTION__
-#endif
-
-#define SWR_NOT_IMPL SWR_INVALID("%s not implemented", SWR_FUNCTION_DECL)
-
-#endif //__SWR_ASSERT_H__
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
deleted file mode 100644
index bee257d7723..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ /dev/null
@@ -1,1802 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file api.cpp
- *
- * @brief API implementation
- *
- ******************************************************************************/
-
-#include <cfloat>
-#include <cmath>
-#include <cstdio>
-#include <new>
-
-#include "core/api.h"
-#include "core/backend.h"
-#include "core/context.h"
-#include "core/depthstencil.h"
-#include "core/frontend.h"
-#include "core/rasterizer.h"
-#include "core/rdtsc_core.h"
-#include "core/threads.h"
-#include "core/tilemgr.h"
-#include "core/clip.h"
-#include "core/utils.h"
-#include "core/tileset.h"
-
-#include "common/os.h"
-
-static const SWR_RECT g_MaxScissorRect = {0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y};
-
-void SetupDefaultState(SWR_CONTEXT* pContext);
-
-static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
-{
- return (SWR_CONTEXT*)hContext;
-}
-
-void WakeAllThreads(SWR_CONTEXT* pContext)
-{
- pContext->FifosNotEmpty.notify_all();
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Create SWR Context.
-/// @param pCreateInfo - pointer to creation info.
-HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
-{
- void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4);
- memset(pContextMem, 0, sizeof(SWR_CONTEXT));
- SWR_CONTEXT* pContext = new (pContextMem) SWR_CONTEXT();
-
- pContext->privateStateSize = pCreateInfo->privateStateSize;
-
- // initialize callback functions
- pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
- pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
- pContext->pfnTranslateGfxptrForRead = pCreateInfo->pfnTranslateGfxptrForRead;
- pContext->pfnTranslateGfxptrForWrite = pCreateInfo->pfnTranslateGfxptrForWrite;
- pContext->pfnMakeGfxPtr = pCreateInfo->pfnMakeGfxPtr;
- pContext->pfnCreateMemoryContext = pCreateInfo->pfnCreateMemoryContext;
- pContext->pfnDestroyMemoryContext = pCreateInfo->pfnDestroyMemoryContext;
- pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
- pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
- pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
- pContext->pfnUpdateStreamOut = pCreateInfo->pfnUpdateStreamOut;
-
-
- pContext->hExternalMemory = pCreateInfo->hExternalMemory;
-
- pContext->MAX_DRAWS_IN_FLIGHT = KNOB_MAX_DRAWS_IN_FLIGHT;
- if (pCreateInfo->MAX_DRAWS_IN_FLIGHT != 0)
- {
- pContext->MAX_DRAWS_IN_FLIGHT = pCreateInfo->MAX_DRAWS_IN_FLIGHT;
- }
-
- pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
- pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
-
- pContext->pMacroTileManagerArray =
- (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
- pContext->pDispatchQueueArray =
- (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
-
- for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
- {
- pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
- new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena);
- new (&pContext->pDispatchQueueArray[dc]) DispatchQueue();
-
- pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
- }
-
- if (pCreateInfo->pThreadInfo)
- {
- pContext->threadInfo = *pCreateInfo->pThreadInfo;
- }
- else
- {
- pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
- pContext->threadInfo.BASE_NUMA_NODE = KNOB_BASE_NUMA_NODE;
- pContext->threadInfo.BASE_CORE = KNOB_BASE_CORE;
- pContext->threadInfo.BASE_THREAD = KNOB_BASE_THREAD;
- pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
- pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
- pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
- pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
- }
-
- if (pCreateInfo->pApiThreadInfo)
- {
- pContext->apiThreadInfo = *pCreateInfo->pApiThreadInfo;
- }
- else
- {
- pContext->apiThreadInfo.bindAPIThread0 = true;
- pContext->apiThreadInfo.numAPIReservedThreads = 1;
- pContext->apiThreadInfo.numAPIThreadsPerCore = 1;
- }
-
- if (pCreateInfo->pWorkerPrivateState)
- {
- pContext->workerPrivateState = *pCreateInfo->pWorkerPrivateState;
- }
-
- memset((void*)&pContext->WaitLock, 0, sizeof(pContext->WaitLock));
- memset((void*)&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
- new (&pContext->WaitLock) std::mutex();
- new (&pContext->FifosNotEmpty) std::condition_variable();
-
- CreateThreadPool(pContext, &pContext->threadPool);
-
- if (pContext->apiThreadInfo.bindAPIThread0)
- {
- BindApiThread(pContext, 0);
- }
-
- if (pContext->threadInfo.SINGLE_THREADED)
- {
- pContext->pSingleThreadLockedTiles = new TileSet();
- }
-
- pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
- pContext->pStats =
- (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
-
-#if defined(KNOB_ENABLE_AR)
- // Setup ArchRast thread contexts which includes +1 for API thread.
- pContext->pArContext = new HANDLE[pContext->NumWorkerThreads + 1];
- pContext->pArContext[pContext->NumWorkerThreads] =
- ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
-#endif
-
-#if defined(KNOB_ENABLE_RDTSC)
- pContext->pBucketMgr = new BucketManager(pCreateInfo->contextName);
- RDTSC_RESET(pContext->pBucketMgr);
- RDTSC_INIT(pContext->pBucketMgr, 0);
-#endif
-
- // Allocate scratch space for workers.
- ///@note We could lazily allocate this but its rather small amount of memory.
- for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
- {
-#if defined(_WIN32)
- uint32_t numaNode =
- pContext->threadPool.pThreadData ? pContext->threadPool.pThreadData[i].numaId : 0;
- pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(GetCurrentProcess(),
- nullptr,
- KNOB_WORKER_SCRATCH_SPACE_SIZE,
- MEM_RESERVE | MEM_COMMIT,
- PAGE_READWRITE,
- numaNode);
-#else
- pContext->ppScratch[i] =
- (uint8_t*)AlignedMalloc(KNOB_WORKER_SCRATCH_SPACE_SIZE, KNOB_SIMD_WIDTH * 4);
-#endif
-
-#if defined(KNOB_ENABLE_AR)
- // Initialize worker thread context for ArchRast.
- pContext->pArContext[i] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::WORKER);
-
- SWR_WORKER_DATA* pWorkerData = (SWR_WORKER_DATA*)pContext->threadPool.pThreadData[i].pWorkerPrivateData;
- pWorkerData->hArContext = pContext->pArContext[i];
-#endif
-
-
- }
-
-#if defined(KNOB_ENABLE_AR)
- // cache the API thread event manager, for use with sim layer
- pCreateInfo->hArEventManager = pContext->pArContext[pContext->NumWorkerThreads];
-#endif
-
- // State setup AFTER context is fully initialized
- SetupDefaultState(pContext);
-
- // initialize hot tile manager
- pContext->pHotTileMgr = new HotTileMgr();
-
- // pass pointer to bucket manager back to caller
-#ifdef KNOB_ENABLE_RDTSC
- pCreateInfo->pBucketMgr = pContext->pBucketMgr;
-#endif
-
- pCreateInfo->contextSaveSize = sizeof(API_STATE);
-
- StartThreadPool(pContext, &pContext->threadPool);
-
- return (HANDLE)pContext;
-}
-
-void CopyState(DRAW_STATE& dst, const DRAW_STATE& src)
-{
- memcpy((void*)&dst.state, (void*)&src.state, sizeof(API_STATE));
-}
-
-template <bool IsDraw>
-void QueueWork(SWR_CONTEXT* pContext)
-{
- DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
- uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
-
- if (IsDraw)
- {
- pDC->pTileMgr = &pContext->pMacroTileManagerArray[dcIndex];
- pDC->pTileMgr->initialize();
- }
-
- // Each worker thread looks at a DC for both FE and BE work at different times and so we
- // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
- // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
- // then moved on if all work is done.)
- pContext->pCurDrawContext->threadsDone = pContext->NumFEThreads + pContext->NumBEThreads;
-
- if (IsDraw)
- {
- InterlockedIncrement(&pContext->drawsOutstandingFE);
- }
-
- _ReadWriteBarrier();
- {
- std::unique_lock<std::mutex> lock(pContext->WaitLock);
- pContext->dcRing.Enqueue();
- }
-
- if (pContext->threadInfo.SINGLE_THREADED)
- {
- uint32_t mxcsr = SetOptimalVectorCSR();
-
- if (IsDraw)
- {
- uint32_t curDraw[2] = {pContext->pCurDrawContext->drawId,
- pContext->pCurDrawContext->drawId};
- WorkOnFifoFE(pContext, 0, curDraw[0]);
- WorkOnFifoBE(pContext, 0, curDraw[1], *pContext->pSingleThreadLockedTiles, 0, 0);
- }
- else
- {
- uint32_t curDispatch = pContext->pCurDrawContext->drawId;
- WorkOnCompute(pContext, 0, curDispatch);
- }
-
- // Dequeue the work here, if not already done, since we're single threaded (i.e. no
- // workers).
- while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0)
- {
- }
-
- // restore csr
- RestoreVectorCSR(mxcsr);
- }
- else
- {
- RDTSC_BEGIN(pContext->pBucketMgr, APIDrawWakeAllThreads, pDC->drawId);
- WakeAllThreads(pContext);
- RDTSC_END(pContext->pBucketMgr, APIDrawWakeAllThreads, 1);
- }
-
- // Set current draw context to NULL so that next state call forces a new draw context to be
- // created and populated.
- pContext->pPrevDrawContext = pContext->pCurDrawContext;
- pContext->pCurDrawContext = nullptr;
-}
-
-INLINE void QueueDraw(SWR_CONTEXT* pContext)
-{
- QueueWork<true>(pContext);
-}
-
-INLINE void QueueDispatch(SWR_CONTEXT* pContext)
-{
- QueueWork<false>(pContext);
-}
-
-DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
-{
- RDTSC_BEGIN(pContext->pBucketMgr, APIGetDrawContext, 0);
- // If current draw context is null then need to obtain a new draw context to use from ring.
- if (pContext->pCurDrawContext == nullptr)
- {
- // Need to wait for a free entry.
- while (pContext->dcRing.IsFull())
- {
- _mm_pause();
- }
-
- uint64_t curDraw = pContext->dcRing.GetHead();
- uint32_t dcIndex = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
-
- if ((pContext->frameCount - pContext->lastFrameChecked) > 2 ||
- (curDraw - pContext->lastDrawChecked) > 0x10000)
- {
- // Take this opportunity to clean-up old arena allocations
- pContext->cachingArenaAllocator.FreeOldBlocks();
-
- pContext->lastFrameChecked = pContext->frameCount;
- pContext->lastDrawChecked = curDraw;
- }
-
- DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex];
- pContext->pCurDrawContext = pCurDrawContext;
-
- // Assign next available entry in DS ring to this DC.
- uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT;
- pCurDrawContext->pState = &pContext->dsRing[dsIndex];
-
- // Copy previous state to current state.
- if (pContext->pPrevDrawContext)
- {
- DRAW_CONTEXT* pPrevDrawContext = pContext->pPrevDrawContext;
-
- // If we're splitting our draw then we can just use the same state from the previous
- // draw. In this case, we won't increment the DS ring index so the next non-split
- // draw can receive the state.
- if (isSplitDraw == false)
- {
- CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState);
-
- // Should have been cleaned up previously
- SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
-
- pCurDrawContext->pState->pPrivateState = nullptr;
-
- pContext->curStateId++; // Progress state ring index forward.
- }
- else
- {
- // If its a split draw then just copy the state pointer over
- // since its the same draw.
- pCurDrawContext->pState = pPrevDrawContext->pState;
- SWR_ASSERT(pPrevDrawContext->cleanupState == false);
- }
- }
- else
- {
- SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
- pContext->curStateId++; // Progress state ring index forward.
- }
-
- SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
-
- // Reset dependency
- pCurDrawContext->dependent = false;
- pCurDrawContext->dependentFE = false;
-
- pCurDrawContext->pContext = pContext;
- pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
-
- pCurDrawContext->doneFE = false;
- pCurDrawContext->FeLock = 0;
- pCurDrawContext->threadsDone = 0;
- pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr;
-
- pCurDrawContext->dynState.Reset(pContext->NumWorkerThreads);
-
- // Assign unique drawId for this DC
- pCurDrawContext->drawId = pContext->dcRing.GetHead();
-
- pCurDrawContext->cleanupState = true;
- }
- else
- {
- SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
- }
-
- RDTSC_END(pContext->pBucketMgr, APIGetDrawContext, 0);
- return pContext->pCurDrawContext;
-}
-
-API_STATE* GetDrawState(SWR_CONTEXT* pContext)
-{
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- SWR_ASSERT(pDC->pState != nullptr);
-
- return &pDC->pState->state;
-}
-
-void SwrDestroyContext(HANDLE hContext)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- pDC->FeWork.type = SHUTDOWN;
- pDC->FeWork.pfnWork = ProcessShutdown;
-
- // enqueue
- QueueDraw(pContext);
-
- DestroyThreadPool(pContext, &pContext->threadPool);
-
- // free the fifos
- for (uint32_t i = 0; i < pContext->MAX_DRAWS_IN_FLIGHT; ++i)
- {
- AlignedFree(pContext->dcRing[i].dynState.pStats);
- delete pContext->dcRing[i].pArena;
- delete pContext->dsRing[i].pArena;
- pContext->pMacroTileManagerArray[i].~MacroTileMgr();
- pContext->pDispatchQueueArray[i].~DispatchQueue();
- }
-
- AlignedFree(pContext->pDispatchQueueArray);
- AlignedFree(pContext->pMacroTileManagerArray);
-
- // Free scratch space.
- for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
- {
-#if defined(_WIN32)
- VirtualFree(pContext->ppScratch[i], 0, MEM_RELEASE);
-#else
- AlignedFree(pContext->ppScratch[i]);
-#endif
-
-#if defined(KNOB_ENABLE_AR)
- ArchRast::DestroyThreadContext(pContext->pArContext[i]);
-#endif
- }
-
-#if defined(KNOB_ENABLE_RDTSC)
- delete pContext->pBucketMgr;
-#endif
-
- delete[] pContext->ppScratch;
- AlignedFree(pContext->pStats);
-
- delete pContext->pHotTileMgr;
- delete pContext->pSingleThreadLockedTiles;
-
- pContext->~SWR_CONTEXT();
- AlignedFree(GetContext(hContext));
-}
-
-void SwrBindApiThread(HANDLE hContext, uint32_t apiThreadId)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- BindApiThread(pContext, apiThreadId);
-}
-
-void SWR_API SwrSaveState(HANDLE hContext, void* pOutputStateBlock, size_t memSize)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- auto pSrc = GetDrawState(pContext);
- assert(pOutputStateBlock && memSize >= sizeof(*pSrc));
-
- memcpy(pOutputStateBlock, pSrc, sizeof(*pSrc));
-}
-
-void SWR_API SwrRestoreState(HANDLE hContext, const void* pStateBlock, size_t memSize)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- auto pDst = GetDrawState(pContext);
- assert(pStateBlock && memSize >= sizeof(*pDst));
-
- memcpy((void*)pDst, (void*)pStateBlock, sizeof(*pDst));
-}
-
-void SetupDefaultState(SWR_CONTEXT* pContext)
-{
- API_STATE* pState = GetDrawState(pContext);
-
- pState->rastState.cullMode = SWR_CULLMODE_NONE;
- pState->rastState.frontWinding = SWR_FRONTWINDING_CCW;
-
- pState->depthBoundsState.depthBoundsTestEnable = false;
- pState->depthBoundsState.depthBoundsTestMinValue = 0.0f;
- pState->depthBoundsState.depthBoundsTestMaxValue = 1.0f;
-}
-
-void SWR_API SwrSync(HANDLE hContext,
- PFN_CALLBACK_FUNC pfnFunc,
- uint64_t userData,
- uint64_t userData2,
- uint64_t userData3)
-{
- SWR_ASSERT(pfnFunc != nullptr);
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APISync, 0);
-
- pDC->FeWork.type = SYNC;
- pDC->FeWork.pfnWork = ProcessSync;
-
- // Setup callback function
- pDC->retireCallback.pfnCallbackFunc = pfnFunc;
- pDC->retireCallback.userData = userData;
- pDC->retireCallback.userData2 = userData2;
- pDC->retireCallback.userData3 = userData3;
-
- AR_API_EVENT(SwrSyncEvent(pDC->drawId));
-
- // enqueue
- QueueDraw(pContext);
-
- RDTSC_END(pContext->pBucketMgr, APISync, 1);
-}
-
-void SwrStallBE(HANDLE hContext)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- pDC->dependent = true;
-}
-
-void SwrWaitForIdle(HANDLE hContext)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0);
-
- while (!pContext->dcRing.IsEmpty())
- {
- _mm_pause();
- }
-
- RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1);
-}
-
-void SwrWaitForIdleFE(HANDLE hContext)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0);
-
- while (pContext->drawsOutstandingFE > 0)
- {
- _mm_pause();
- }
-
- RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1);
-}
-
-void SwrSetVertexBuffers(HANDLE hContext,
- uint32_t numBuffers,
- const SWR_VERTEX_BUFFER_STATE* pVertexBuffers)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- for (uint32_t i = 0; i < numBuffers; ++i)
- {
- const SWR_VERTEX_BUFFER_STATE* pVB = &pVertexBuffers[i];
- pState->vertexBuffers[pVB->index] = *pVB;
- }
-}
-
-void SwrSetIndexBuffer(HANDLE hContext, const SWR_INDEX_BUFFER_STATE* pIndexBuffer)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->indexBuffer = *pIndexBuffer;
-}
-
-void SwrSetFetchFunc(HANDLE hContext, PFN_FETCH_FUNC pfnFetchFunc)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->pfnFetchFunc = pfnFetchFunc;
-}
-
-void SwrSetSoFunc(HANDLE hContext, PFN_SO_FUNC pfnSoFunc, uint32_t streamIndex)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- SWR_ASSERT(streamIndex < MAX_SO_STREAMS);
-
- pState->pfnSoFunc[streamIndex] = pfnSoFunc;
-}
-
-void SwrSetSoState(HANDLE hContext, SWR_STREAMOUT_STATE* pSoState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->soState = *pSoState;
-}
-
-void SwrSetSoBuffers(HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t slot)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- SWR_ASSERT((slot < MAX_SO_STREAMS), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot);
-
- // remember buffer status in case of future resume StreamOut
- if ((pState->soBuffer[slot].pBuffer != 0) && (pSoBuffer->pBuffer == 0))
- pState->soPausedBuffer[slot] = pState->soBuffer[slot];
-
- // resume
- if (pState->soPausedBuffer[slot].pBuffer == pSoBuffer->pBuffer)
- pState->soBuffer[slot] = pState->soPausedBuffer[slot];
- else
- pState->soBuffer[slot] = *pSoBuffer;
-}
-
-void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->pfnVertexFunc = pfnVertexFunc;
-}
-
-void SwrSetFrontendState(HANDLE hContext, SWR_FRONTEND_STATE* pFEState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->frontendState = *pFEState;
-}
-
-void SwrSetGsState(HANDLE hContext, SWR_GS_STATE* pGSState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->gsState = *pGSState;
-}
-
-void SwrSetGsFunc(HANDLE hContext, PFN_GS_FUNC pfnGsFunc)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->pfnGsFunc = pfnGsFunc;
-}
-
-void SwrSetCsFunc(HANDLE hContext,
- PFN_CS_FUNC pfnCsFunc,
- uint32_t totalThreadsInGroup,
- uint32_t totalSpillFillSize,
- uint32_t scratchSpaceSizePerWarp,
- uint32_t numWarps)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->pfnCsFunc = pfnCsFunc;
- pState->totalThreadsInGroup = totalThreadsInGroup;
- pState->totalSpillFillSize = totalSpillFillSize;
- pState->scratchSpaceSizePerWarp = scratchSpaceSizePerWarp;
- pState->scratchSpaceNumWarps = numWarps;
-}
-
-void SwrSetTsState(HANDLE hContext, SWR_TS_STATE* pState)
-{
- API_STATE* pApiState = GetDrawState(GetContext(hContext));
- pApiState->tsState = *pState;
-}
-
-void SwrSetHsFunc(HANDLE hContext, PFN_HS_FUNC pfnFunc)
-{
- API_STATE* pApiState = GetDrawState(GetContext(hContext));
- pApiState->pfnHsFunc = pfnFunc;
-}
-
-void SwrSetDsFunc(HANDLE hContext, PFN_DS_FUNC pfnFunc)
-{
- API_STATE* pApiState = GetDrawState(GetContext(hContext));
- pApiState->pfnDsFunc = pfnFunc;
-}
-
-void SwrSetDepthStencilState(HANDLE hContext, SWR_DEPTH_STENCIL_STATE* pDSState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->depthStencilState = *pDSState;
-}
-
-void SwrSetBackendState(HANDLE hContext, SWR_BACKEND_STATE* pBEState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->backendState = *pBEState;
-}
-
-void SwrSetDepthBoundsState(HANDLE hContext, SWR_DEPTH_BOUNDS_STATE* pDBState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
-
- pState->depthBoundsState = *pDBState;
-}
-
-void SwrSetPixelShaderState(HANDLE hContext, SWR_PS_STATE* pPSState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->psState = *pPSState;
-}
-
-void SwrSetBlendState(HANDLE hContext, SWR_BLEND_STATE* pBlendState)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- memcpy(&pState->blendState, pBlendState, sizeof(SWR_BLEND_STATE));
-}
-
-void SwrSetBlendFunc(HANDLE hContext, uint32_t renderTarget, PFN_BLEND_JIT_FUNC pfnBlendFunc)
-{
- SWR_ASSERT(renderTarget < SWR_NUM_RENDERTARGETS);
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
-}
-
-// update guardband multipliers for the viewport
-void updateGuardbands(API_STATE* pState)
-{
- uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
-
- for (uint32_t i = 0; i < numGbs; ++i)
- {
- // guardband center is viewport center
- pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
- pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
- pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
- pState->gbState.bottom[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
- }
-}
-
-void SwrSetRastState(HANDLE hContext, const SWR_RASTSTATE* pRastState)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- API_STATE* pState = GetDrawState(pContext);
-
- memcpy((void*)&pState->rastState, (void*)pRastState, sizeof(SWR_RASTSTATE));
-}
-
-void SwrSetViewports(HANDLE hContext,
- uint32_t numViewports,
- const SWR_VIEWPORT* pViewports,
- const SWR_VIEWPORT_MATRICES* pMatrices)
-{
- SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of viewports.");
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- API_STATE* pState = GetDrawState(pContext);
-
- memcpy(&pState->vp[0], pViewports, sizeof(SWR_VIEWPORT) * numViewports);
- // @todo Faster to copy portions of the SOA or just copy all of it?
- memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES));
-}
-
-void SwrSetScissorRects(HANDLE hContext, uint32_t numScissors, const SWR_RECT* pScissors)
-{
- SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of scissor rects.");
-
- API_STATE* pState = GetDrawState(GetContext(hContext));
- memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
-};
-
-void SetupMacroTileScissors(DRAW_CONTEXT* pDC)
-{
- API_STATE* pState = &pDC->pState->state;
- uint32_t numScissors =
- pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
- pState->scissorsTileAligned = true;
-
- for (uint32_t index = 0; index < numScissors; ++index)
- {
- SWR_RECT& scissorInFixedPoint = pState->scissorsInFixedPoint[index];
-
- // Set up scissor dimensions based on scissor or viewport
- if (pState->rastState.scissorEnable)
- {
- scissorInFixedPoint = pState->scissorRects[index];
- }
- else
- {
- // the vp width and height must be added to origin un-rounded then the result round to
- // -inf. The cast to int works for rounding assuming all [left, right, top, bottom] are
- // positive.
- scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x;
- scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width);
- scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y;
- scissorInFixedPoint.ymax = (int32_t)(pState->vp[index].y + pState->vp[index].height);
- }
-
- // Clamp to max rect
- scissorInFixedPoint &= g_MaxScissorRect;
-
- // Test for tile alignment
- bool tileAligned;
- tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
- tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0;
- tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0;
- tileAligned &= (scissorInFixedPoint.ymax % KNOB_TILE_Y_DIM) == 0;
-
- pState->scissorsTileAligned &= tileAligned;
-
- // Scale to fixed point
- scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
- scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
- scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
- scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
-
- // Make scissor inclusive
- scissorInFixedPoint.xmax -= 1;
- scissorInFixedPoint.ymax -= 1;
- }
-}
-
-
-// templated backend function tables
-
-void SetupPipeline(DRAW_CONTEXT* pDC)
-{
- DRAW_STATE* pState = pDC->pState;
- const SWR_RASTSTATE& rastState = pState->state.rastState;
- const SWR_PS_STATE& psState = pState->state.psState;
- BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
-
- // setup backend
- if (psState.pfnPixelShader == nullptr)
- {
- backendFuncs.pfnBackend = gBackendNullPs[pState->state.rastState.sampleCount];
- }
- else
- {
- const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
- const bool bMultisampleEnable =
- ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
- const uint32_t centroid =
- ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
- const uint32_t canEarlyZ =
- (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesUAV)) ? 1 : 0;
- SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
-
- // select backend function
- switch (psState.shadingRate)
- {
- case SWR_SHADING_RATE_PIXEL:
- if (bMultisampleEnable)
- {
- // always need to generate I & J per sample for Z interpolation
- barycentricsMask =
- (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend =
- gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern]
- [psState.inputCoverage][centroid][forcedSampleCount]
- [canEarlyZ]
- ;
- }
- else
- {
- // always need to generate I & J per pixel for Z interpolation
- barycentricsMask =
- (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
- backendFuncs.pfnBackend =
- gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
- }
- break;
- case SWR_SHADING_RATE_SAMPLE:
- SWR_ASSERT(rastState.bIsCenterPattern != true);
- // always need to generate I & J per sample for Z interpolation
- barycentricsMask =
- (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend =
- gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid]
- [canEarlyZ];
- break;
- default:
- SWR_ASSERT(0 && "Invalid shading rate");
- break;
- }
- }
-
- SWR_ASSERT(backendFuncs.pfnBackend);
-
- PFN_PROCESS_PRIMS pfnBinner;
-#if USE_SIMD16_FRONTEND
- PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16;
-#endif
- switch (pState->state.topology)
- {
- case TOP_POINT_LIST:
- pState->pfnProcessPrims = ClipPoints;
- pfnBinner = BinPoints;
-#if USE_SIMD16_FRONTEND
- pState->pfnProcessPrims_simd16 = ClipPoints_simd16;
- pfnBinner_simd16 = BinPoints_simd16;
-#endif
- break;
- case TOP_LINE_LIST:
- case TOP_LINE_STRIP:
- case TOP_LINE_LOOP:
- case TOP_LINE_LIST_ADJ:
- case TOP_LISTSTRIP_ADJ:
- pState->pfnProcessPrims = ClipLines;
- pfnBinner = BinLines;
-#if USE_SIMD16_FRONTEND
- pState->pfnProcessPrims_simd16 = ClipLines_simd16;
- pfnBinner_simd16 = BinLines_simd16;
-#endif
- break;
- default:
- pState->pfnProcessPrims = ClipTriangles;
- pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
-#if USE_SIMD16_FRONTEND
- pState->pfnProcessPrims_simd16 = ClipTriangles_simd16;
- pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0));
-#endif
- break;
- };
-
-
- // Disable clipper if viewport transform is disabled or if clipper is disabled
- if (pState->state.frontendState.vpTransformDisable || !pState->state.rastState.clipEnable)
- {
- pState->pfnProcessPrims = pfnBinner;
-#if USE_SIMD16_FRONTEND
- pState->pfnProcessPrims_simd16 = pfnBinner_simd16;
-#endif
- }
-
- // Disable rasterizer and backend if no pixel, no depth/stencil, and no attributes
- if ((pState->state.psState.pfnPixelShader == nullptr) &&
- (pState->state.depthStencilState.depthTestEnable == FALSE) &&
- (pState->state.depthStencilState.depthWriteEnable == FALSE) &&
- (pState->state.depthStencilState.stencilTestEnable == FALSE) &&
- (pState->state.depthStencilState.stencilWriteEnable == FALSE) &&
- (pState->state.backendState.numAttributes == 0))
- {
- pState->pfnProcessPrims = nullptr;
-#if USE_SIMD16_FRONTEND
- pState->pfnProcessPrims_simd16 = nullptr;
-#endif
- }
-
- if (pState->state.soState.rasterizerDisable == true)
- {
- pState->pfnProcessPrims = nullptr;
-#if USE_SIMD16_FRONTEND
- pState->pfnProcessPrims_simd16 = nullptr;
-#endif
- }
-
-
- // set up the frontend attribute count
- pState->state.feNumAttributes = 0;
- const SWR_BACKEND_STATE& backendState = pState->state.backendState;
- if (backendState.swizzleEnable)
- {
- // attribute swizzling is enabled, iterate over the map and record the max attribute used
- for (uint32_t i = 0; i < backendState.numAttributes; ++i)
- {
- pState->state.feNumAttributes =
- std::max(pState->state.feNumAttributes,
- (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
- }
- }
- else
- {
- pState->state.feNumAttributes = pState->state.backendState.numAttributes;
- }
-
- if (pState->state.soState.soEnable)
- {
- uint64_t streamMasks = 0;
- for (uint32_t i = 0; i < 4; ++i)
- {
- streamMasks |= pState->state.soState.streamMasks[i];
- }
-
- unsigned long maxAttrib;
- if (_BitScanReverse64(&maxAttrib, streamMasks))
- {
- pState->state.feNumAttributes =
- std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
- }
- }
-
- // complicated logic to test for cases where we don't need backing hottile memory for a draw
- // have to check for the special case where depth/stencil test is enabled but depthwrite is
- // disabled.
- pState->state.depthHottileEnable =
- ((!(pState->state.depthStencilState.depthTestEnable &&
- !pState->state.depthStencilState.depthWriteEnable &&
- !pState->state.depthBoundsState.depthBoundsTestEnable &&
- pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) &&
- (pState->state.depthStencilState.depthTestEnable ||
- pState->state.depthStencilState.depthWriteEnable ||
- pState->state.depthBoundsState.depthBoundsTestEnable))
- ? true
- : false;
-
- pState->state.stencilHottileEnable =
- (((!(pState->state.depthStencilState.stencilTestEnable &&
- !pState->state.depthStencilState.stencilWriteEnable &&
- pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) ||
- // for stencil we have to check the double sided state as well
- (!(pState->state.depthStencilState.doubleSidedStencilTestEnable &&
- !pState->state.depthStencilState.stencilWriteEnable &&
- pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) &&
- (pState->state.depthStencilState.stencilTestEnable ||
- pState->state.depthStencilState.stencilWriteEnable))
- ? true
- : false;
-
- uint32_t hotTileEnable = pState->state.psState.renderTargetMask;
-
- // Disable hottile for surfaces with no writes
- if (psState.pfnPixelShader != nullptr)
- {
- unsigned long rt;
- uint32_t rtMask = pState->state.psState.renderTargetMask;
- while (_BitScanForward(&rt, rtMask))
- {
- rtMask &= ~(1 << rt);
-
- if (pState->state.blendState.renderTarget[rt].writeDisableAlpha &&
- pState->state.blendState.renderTarget[rt].writeDisableRed &&
- pState->state.blendState.renderTarget[rt].writeDisableGreen &&
- pState->state.blendState.renderTarget[rt].writeDisableBlue)
- {
- hotTileEnable &= ~(1 << rt);
- }
- }
- }
-
- pState->state.colorHottileEnable = hotTileEnable;
-
- // Setup depth quantization function
- if (pState->state.depthHottileEnable)
- {
- switch (pState->state.rastState.depthFormat)
- {
- case R32_FLOAT_X8X24_TYPELESS:
- pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT_X8X24_TYPELESS>;
- break;
- case R32_FLOAT:
- pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
- break;
- case R24_UNORM_X8_TYPELESS:
- pState->state.pfnQuantizeDepth = QuantizeDepth<R24_UNORM_X8_TYPELESS>;
- break;
- case R16_UNORM:
- pState->state.pfnQuantizeDepth = QuantizeDepth<R16_UNORM>;
- break;
- default:
- SWR_INVALID("Unsupported depth format for depth quantization.");
- pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
- }
- }
- else
- {
- // set up pass-through quantize if depth isn't enabled
- pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
- }
-
- // Generate guardbands
- updateGuardbands(&pState->state);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief InitDraw
-/// @param pDC - Draw context to initialize for this draw.
-void InitDraw(DRAW_CONTEXT* pDC, bool isSplitDraw)
-{
- // We don't need to re-setup the scissors/pipeline state again for split draw.
- if (isSplitDraw == false)
- {
- SetupMacroTileScissors(pDC);
- SetupPipeline(pDC);
- }
-
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief We can split the draw for certain topologies for better performance.
-/// @param totalVerts - Total vertices for draw
-/// @param topology - Topology used for draw
-uint32_t MaxVertsPerDraw(DRAW_CONTEXT* pDC, uint32_t totalVerts, PRIMITIVE_TOPOLOGY topology)
-{
- API_STATE& state = pDC->pState->state;
-
- // We can not split draws that have streamout enabled because there is no practical way
- // to support multiple threads generating SO data for a single set of buffers.
- if (state.soState.soEnable)
- {
- return totalVerts;
- }
-
- // The Primitive Assembly code can only handle 1 RECT at a time. Specified with only 3 verts.
- if (topology == TOP_RECT_LIST)
- {
- return 3;
- }
-
- // Is split drawing disabled?
- if (KNOB_DISABLE_SPLIT_DRAW)
- {
- return totalVerts;
- }
-
- uint32_t vertsPerDraw = totalVerts;
-
- switch (topology)
- {
- case TOP_POINT_LIST:
- case TOP_TRIANGLE_LIST:
- vertsPerDraw = KNOB_MAX_PRIMS_PER_DRAW;
- break;
-
- case TOP_PATCHLIST_1:
- case TOP_PATCHLIST_2:
- case TOP_PATCHLIST_3:
- case TOP_PATCHLIST_4:
- case TOP_PATCHLIST_5:
- case TOP_PATCHLIST_6:
- case TOP_PATCHLIST_7:
- case TOP_PATCHLIST_8:
- case TOP_PATCHLIST_9:
- case TOP_PATCHLIST_10:
- case TOP_PATCHLIST_11:
- case TOP_PATCHLIST_12:
- case TOP_PATCHLIST_13:
- case TOP_PATCHLIST_14:
- case TOP_PATCHLIST_15:
- case TOP_PATCHLIST_16:
- case TOP_PATCHLIST_17:
- case TOP_PATCHLIST_18:
- case TOP_PATCHLIST_19:
- case TOP_PATCHLIST_20:
- case TOP_PATCHLIST_21:
- case TOP_PATCHLIST_22:
- case TOP_PATCHLIST_23:
- case TOP_PATCHLIST_24:
- case TOP_PATCHLIST_25:
- case TOP_PATCHLIST_26:
- case TOP_PATCHLIST_27:
- case TOP_PATCHLIST_28:
- case TOP_PATCHLIST_29:
- case TOP_PATCHLIST_30:
- case TOP_PATCHLIST_31:
- case TOP_PATCHLIST_32:
- if (pDC->pState->state.tsState.tsEnable)
- {
- uint32_t vertsPerPrim = topology - TOP_PATCHLIST_BASE;
- vertsPerDraw = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW;
- }
- break;
- default:
- // We are not splitting up draws for other topologies.
- break;
- }
-
- return vertsPerDraw;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief DrawInstanced
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
-/// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param numInstances - How many instances to render.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer
-/// (instanced data)
-void DrawInstanced(HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numVertices,
- uint32_t startVertex,
- uint32_t numInstances = 1,
- uint32_t startInstance = 0)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIDraw, pDC->drawId);
-
- uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
- uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
- uint32_t remainingVerts = numVertices;
-
- API_STATE* pState = &pDC->pState->state;
- pState->topology = topology;
- pState->forceFront = false;
-
- // disable culling for points/lines
- uint32_t oldCullMode = pState->rastState.cullMode;
- if (topology == TOP_POINT_LIST)
- {
- pState->rastState.cullMode = SWR_CULLMODE_NONE;
- pState->forceFront = true;
- }
- else if (topology == TOP_RECT_LIST)
- {
- pState->rastState.cullMode = SWR_CULLMODE_NONE;
- }
-
- int draw = 0;
- while (remainingVerts)
- {
- uint32_t numVertsForDraw =
- (remainingVerts < maxVertsPerDraw) ? remainingVerts : maxVertsPerDraw;
-
- bool isSplitDraw = (draw > 0) ? !KNOB_DISABLE_SPLIT_DRAW : false;
- DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw);
- InitDraw(pDC, isSplitDraw);
-
- pDC->FeWork.type = DRAW;
- pDC->FeWork.pfnWork = GetProcessDrawFunc(false, // IsIndexed
- false, // bEnableCutIndex
- pState->tsState.tsEnable,
- pState->gsState.gsEnable,
- pState->soState.soEnable,
- pDC->pState->pfnProcessPrims != nullptr);
- pDC->FeWork.desc.draw.numVerts = numVertsForDraw;
- pDC->FeWork.desc.draw.startVertex = startVertex;
- pDC->FeWork.desc.draw.numInstances = numInstances;
- pDC->FeWork.desc.draw.startInstance = startInstance;
- pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
- pDC->FeWork.desc.draw.startVertexID = draw * maxVertsPerDraw;
-
- pDC->cleanupState = (remainingVerts == numVertsForDraw);
-
- // enqueue DC
- QueueDraw(pContext);
-
- AR_API_EVENT(DrawInstancedEvent(pDC->drawId,
- topology,
- numVertsForDraw,
- startVertex,
- numInstances,
- startInstance,
- pState->tsState.tsEnable,
- pState->gsState.gsEnable,
- pState->soState.soEnable,
- pState->gsState.outputTopology,
- draw));
-
- remainingVerts -= numVertsForDraw;
- draw++;
- }
-
- // restore culling state
- pDC = GetDrawContext(pContext);
- pDC->pState->state.rastState.cullMode = oldCullMode;
-
- RDTSC_END(pContext->pBucketMgr, APIDraw, numVertices * numInstances);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDraw
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param startVertex - Specifies start vertex in vertex buffer for draw.
-/// @param primCount - Number of vertices.
-void SwrDraw(HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t startVertex,
- uint32_t numVertices)
-{
- DrawInstanced(hContext, topology, numVertices, startVertex);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDrawInstanced
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
-/// @param numInstances - How many instances to render.
-/// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer
-/// (instanced data)
-void SwrDrawInstanced(HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numVertsPerInstance,
- uint32_t numInstances,
- uint32_t startVertex,
- uint32_t startInstance)
-{
- DrawInstanced(
- hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief DrawIndexedInstanced
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numIndices - Number of indices to read sequentially from index buffer.
-/// @param indexOffset - Starting index into index buffer.
-/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param numInstances - Number of instances to render.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer
-/// (instanced data)
-void DrawIndexedInstance(HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t indexOffset,
- int32_t baseVertex,
- uint32_t numInstances = 1,
- uint32_t startInstance = 0)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- API_STATE* pState = &pDC->pState->state;
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIDrawIndexed, pDC->drawId);
-
- uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
- uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
- uint32_t remainingIndices = numIndices;
-
- uint32_t indexSize = 0;
- switch (pState->indexBuffer.format)
- {
- case R32_UINT:
- indexSize = sizeof(uint32_t);
- break;
- case R16_UINT:
- indexSize = sizeof(uint16_t);
- break;
- case R8_UINT:
- indexSize = sizeof(uint8_t);
- break;
- default:
- SWR_INVALID("Invalid index buffer format: %d", pState->indexBuffer.format);
- }
-
- int draw = 0;
- gfxptr_t xpIB = pState->indexBuffer.xpIndices;
- xpIB += (uint64_t)indexOffset * (uint64_t)indexSize;
-
- pState->topology = topology;
- pState->forceFront = false;
-
- // disable culling for points/lines
- uint32_t oldCullMode = pState->rastState.cullMode;
- if (topology == TOP_POINT_LIST)
- {
- pState->rastState.cullMode = SWR_CULLMODE_NONE;
- pState->forceFront = true;
- }
- else if (topology == TOP_RECT_LIST)
- {
- pState->rastState.cullMode = SWR_CULLMODE_NONE;
- }
-
- while (remainingIndices)
- {
- uint32_t numIndicesForDraw =
- (remainingIndices < maxIndicesPerDraw) ? remainingIndices : maxIndicesPerDraw;
-
- // When breaking up draw, we need to obtain new draw context for each iteration.
- bool isSplitDraw = (draw > 0) ? !KNOB_DISABLE_SPLIT_DRAW : false;
-
- pDC = GetDrawContext(pContext, isSplitDraw);
- InitDraw(pDC, isSplitDraw);
-
- pDC->FeWork.type = DRAW;
- pDC->FeWork.pfnWork = GetProcessDrawFunc(true, // IsIndexed
- pState->frontendState.bEnableCutIndex,
- pState->tsState.tsEnable,
- pState->gsState.gsEnable,
- pState->soState.soEnable,
- pDC->pState->pfnProcessPrims != nullptr);
- pDC->FeWork.desc.draw.pDC = pDC;
- pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
- pDC->FeWork.desc.draw.xpIB = xpIB;
- pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
-
- pDC->FeWork.desc.draw.numInstances = numInstances;
- pDC->FeWork.desc.draw.startInstance = startInstance;
- pDC->FeWork.desc.draw.baseVertex = baseVertex;
- pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
-
- pDC->cleanupState = (remainingIndices == numIndicesForDraw);
-
- // enqueue DC
- QueueDraw(pContext);
-
- AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId,
- topology,
- numIndicesForDraw,
- indexOffset,
- baseVertex,
- numInstances,
- startInstance,
- pState->tsState.tsEnable,
- pState->gsState.gsEnable,
- pState->soState.soEnable,
- pState->gsState.outputTopology,
- draw));
-
- xpIB += maxIndicesPerDraw * indexSize;
- remainingIndices -= numIndicesForDraw;
- draw++;
- }
-
- // Restore culling state
- pDC = GetDrawContext(pContext);
- pDC->pState->state.rastState.cullMode = oldCullMode;
-
- RDTSC_END(pContext->pBucketMgr, APIDrawIndexed, numIndices * numInstances);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief DrawIndexed
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numIndices - Number of indices to read sequentially from index buffer.
-/// @param indexOffset - Starting index into index buffer.
-/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-void SwrDrawIndexed(HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t indexOffset,
- int32_t baseVertex)
-{
- DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDrawIndexedInstanced
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numIndices - Number of indices to read sequentially from index buffer.
-/// @param numInstances - Number of instances to render.
-/// @param indexOffset - Starting index into index buffer.
-/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer
-/// (instanced data)
-void SwrDrawIndexedInstanced(HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t numInstances,
- uint32_t indexOffset,
- int32_t baseVertex,
- uint32_t startInstance)
-{
- DrawIndexedInstance(
- hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrInvalidateTiles
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
-/// invalidate.
-/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
-/// be hottile size-aligned.
-void SWR_API SwrInvalidateTiles(HANDLE hContext,
- uint32_t attachmentMask,
- const SWR_RECT& invalidateRect)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- pDC->FeWork.type = DISCARDINVALIDATETILES;
- pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
- pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
- pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
- pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
- pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
- pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
- pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
-
- // enqueue
- QueueDraw(pContext);
-
- AR_API_EVENT(SwrInvalidateTilesEvent(pDC->drawId));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDiscardRect
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
-/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
-/// discarded.
-void SWR_API SwrDiscardRect(HANDLE hContext, uint32_t attachmentMask, const SWR_RECT& rect)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- // Queue a load to the hottile
- pDC->FeWork.type = DISCARDINVALIDATETILES;
- pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
- pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
- pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
- pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
- pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
- pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
- pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
-
- // enqueue
- QueueDraw(pContext);
-
- AR_API_EVENT(SwrDiscardRectEvent(pDC->drawId));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDispatch
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param threadGroupCountX - Number of thread groups dispatched in X direction
-/// @param threadGroupCountY - Number of thread groups dispatched in Y direction
-/// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
-void SwrDispatch(HANDLE hContext,
- uint32_t threadGroupCountX,
- uint32_t threadGroupCountY,
- uint32_t threadGroupCountZ
-
-)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIDispatch, pDC->drawId);
- AR_API_EVENT(
- DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
- pDC->isCompute = true; // This is a compute context.
-
- COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
-
- pTaskData->threadGroupCountX = threadGroupCountX;
- pTaskData->threadGroupCountY = threadGroupCountY;
- pTaskData->threadGroupCountZ = threadGroupCountZ;
-
- pTaskData->enableThreadDispatch = false;
-
- uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
- uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
- pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
- pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
-
- QueueDispatch(pContext);
- RDTSC_END(pContext->pBucketMgr,
- APIDispatch,
- threadGroupCountX * threadGroupCountY * threadGroupCountZ);
-}
-
-// Deswizzles, converts and stores current contents of the hot tiles to surface
-// described by pState
-void SWR_API SwrStoreTiles(HANDLE hContext,
- uint32_t attachmentMask,
- SWR_TILE_STATE postStoreTileState,
- const SWR_RECT& storeRect)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIStoreTiles, pDC->drawId);
-
- pDC->FeWork.type = STORETILES;
- pDC->FeWork.pfnWork = ProcessStoreTiles;
- pDC->FeWork.desc.storeTiles.attachmentMask = attachmentMask;
- pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
- pDC->FeWork.desc.storeTiles.rect = storeRect;
- pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
-
- // enqueue
- QueueDraw(pContext);
-
- AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId));
-
- RDTSC_END(pContext->pBucketMgr, APIStoreTiles, 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear
-/// @param renderTargetArrayIndex - the RT array index to clear
-/// @param clearColor - color use for clearing render targets
-/// @param z - depth value use for clearing depth buffer
-/// @param stencil - stencil value used for clearing stencil buffer
-/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
-void SWR_API SwrClearRenderTarget(HANDLE hContext,
- uint32_t attachmentMask,
- uint32_t renderTargetArrayIndex,
- const float clearColor[4],
- float z,
- uint8_t stencil,
- const SWR_RECT& clearRect)
-{
- if (KNOB_TOSS_DRAW)
- {
- return;
- }
-
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- RDTSC_BEGIN(pContext->pBucketMgr, APIClearRenderTarget, pDC->drawId);
-
- pDC->FeWork.type = CLEAR;
- pDC->FeWork.pfnWork = ProcessClear;
- pDC->FeWork.desc.clear.rect = clearRect;
- pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
- pDC->FeWork.desc.clear.attachmentMask = attachmentMask;
- pDC->FeWork.desc.clear.renderTargetArrayIndex = renderTargetArrayIndex;
- pDC->FeWork.desc.clear.clearDepth = z;
- pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
- pDC->FeWork.desc.clear.clearRTColor[1] = clearColor[1];
- pDC->FeWork.desc.clear.clearRTColor[2] = clearColor[2];
- pDC->FeWork.desc.clear.clearRTColor[3] = clearColor[3];
- pDC->FeWork.desc.clear.clearStencil = stencil;
-
- // enqueue draw
- QueueDraw(pContext);
-
- RDTSC_END(pContext->pBucketMgr, APIClearRenderTarget, 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Returns a pointer to the private context state for the current
-/// draw operation. This is used for external componets such as the
-/// sampler.
-/// SWR is responsible for the allocation of the private context state.
-/// @param hContext - Handle passed back from SwrCreateContext
-VOID* SwrGetPrivateContextState(HANDLE hContext)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- DRAW_STATE* pState = pDC->pState;
-
- if (pState->pPrivateState == nullptr)
- {
- pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize,
- KNOB_SIMD_WIDTH * sizeof(float));
- }
-
- return pState->pPrivateState;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Clients can use this to allocate memory for draw/dispatch
-/// operations. The memory will automatically be freed once operation
-/// has completed. Client can use this to allocate binding tables,
-/// etc. needed for shader execution.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param size - Size of allocation
-/// @param align - Alignment needed for allocation.
-VOID* SwrAllocDrawContextMemory(HANDLE hContext, uint32_t size, uint32_t align)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- return pDC->pState->pArena->AllocAligned(size, align);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Enables stats counting
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param enable - If true then counts are incremented.
-void SwrEnableStatsFE(HANDLE hContext, bool enable)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- pDC->pState->state.enableStatsFE = enable;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Enables stats counting
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param enable - If true then counts are incremented.
-void SwrEnableStatsBE(HANDLE hContext, bool enable)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-
- pDC->pState->state.enableStatsBE = enable;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Mark end of frame - used for performance profiling
-/// @param hContext - Handle passed back from SwrCreateContext
-void SWR_API SwrEndFrame(HANDLE hContext)
-{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- (void)pDC; // var used
-
- RDTSC_ENDFRAME(pContext->pBucketMgr);
- AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId));
-
- pContext->frameCount++;
-}
-
-void InitSimLoadTilesTable();
-void InitSimStoreTilesTable();
-void InitSimClearTilesTable();
-
-void InitClearTilesTable();
-void InitBackendFuncTables();
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Initialize swr backend and memory internal tables
-void SwrInit()
-{
- InitClearTilesTable();
- InitBackendFuncTables();
- InitRasterizerFunctions();
-}
-
-void SwrGetInterface(SWR_INTERFACE& out_funcs)
-{
- out_funcs.pfnSwrCreateContext = SwrCreateContext;
- out_funcs.pfnSwrDestroyContext = SwrDestroyContext;
- out_funcs.pfnSwrBindApiThread = SwrBindApiThread;
- out_funcs.pfnSwrSaveState = SwrSaveState;
- out_funcs.pfnSwrRestoreState = SwrRestoreState;
- out_funcs.pfnSwrSync = SwrSync;
- out_funcs.pfnSwrStallBE = SwrStallBE;
- out_funcs.pfnSwrWaitForIdle = SwrWaitForIdle;
- out_funcs.pfnSwrWaitForIdleFE = SwrWaitForIdleFE;
- out_funcs.pfnSwrSetVertexBuffers = SwrSetVertexBuffers;
- out_funcs.pfnSwrSetIndexBuffer = SwrSetIndexBuffer;
- out_funcs.pfnSwrSetFetchFunc = SwrSetFetchFunc;
- out_funcs.pfnSwrSetSoFunc = SwrSetSoFunc;
- out_funcs.pfnSwrSetSoState = SwrSetSoState;
- out_funcs.pfnSwrSetSoBuffers = SwrSetSoBuffers;
- out_funcs.pfnSwrSetVertexFunc = SwrSetVertexFunc;
- out_funcs.pfnSwrSetFrontendState = SwrSetFrontendState;
- out_funcs.pfnSwrSetGsState = SwrSetGsState;
- out_funcs.pfnSwrSetGsFunc = SwrSetGsFunc;
- out_funcs.pfnSwrSetCsFunc = SwrSetCsFunc;
- out_funcs.pfnSwrSetTsState = SwrSetTsState;
- out_funcs.pfnSwrSetHsFunc = SwrSetHsFunc;
- out_funcs.pfnSwrSetDsFunc = SwrSetDsFunc;
- out_funcs.pfnSwrSetDepthStencilState = SwrSetDepthStencilState;
- out_funcs.pfnSwrSetBackendState = SwrSetBackendState;
- out_funcs.pfnSwrSetDepthBoundsState = SwrSetDepthBoundsState;
- out_funcs.pfnSwrSetPixelShaderState = SwrSetPixelShaderState;
- out_funcs.pfnSwrSetBlendState = SwrSetBlendState;
- out_funcs.pfnSwrSetBlendFunc = SwrSetBlendFunc;
- out_funcs.pfnSwrDraw = SwrDraw;
- out_funcs.pfnSwrDrawInstanced = SwrDrawInstanced;
- out_funcs.pfnSwrDrawIndexed = SwrDrawIndexed;
- out_funcs.pfnSwrDrawIndexedInstanced = SwrDrawIndexedInstanced;
- out_funcs.pfnSwrInvalidateTiles = SwrInvalidateTiles;
- out_funcs.pfnSwrDiscardRect = SwrDiscardRect;
- out_funcs.pfnSwrDispatch = SwrDispatch;
- out_funcs.pfnSwrStoreTiles = SwrStoreTiles;
- out_funcs.pfnSwrClearRenderTarget = SwrClearRenderTarget;
- out_funcs.pfnSwrSetRastState = SwrSetRastState;
- out_funcs.pfnSwrSetViewports = SwrSetViewports;
- out_funcs.pfnSwrSetScissorRects = SwrSetScissorRects;
- out_funcs.pfnSwrGetPrivateContextState = SwrGetPrivateContextState;
- out_funcs.pfnSwrAllocDrawContextMemory = SwrAllocDrawContextMemory;
- out_funcs.pfnSwrEnableStatsFE = SwrEnableStatsFE;
- out_funcs.pfnSwrEnableStatsBE = SwrEnableStatsBE;
- out_funcs.pfnSwrEndFrame = SwrEndFrame;
- out_funcs.pfnSwrInit = SwrInit;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h
deleted file mode 100644
index 79e33b01677..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ /dev/null
@@ -1,772 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file api.h
- *
- * @brief API definitions
- *
- ******************************************************************************/
-
-#ifndef __SWR_API_H__
-#define __SWR_API_H__
-
-#include "common/os.h"
-
-#include <assert.h>
-#include <algorithm>
-
-#include "common/intrin.h"
-#include "common/formats.h"
-#include "core/state.h"
-
-typedef void(SWR_API* PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Rectangle structure
-struct SWR_RECT
-{
- int32_t xmin; ///< inclusive
- int32_t ymin; ///< inclusive
- int32_t xmax; ///< exclusive
- int32_t ymax; ///< exclusive
-
- bool operator==(const SWR_RECT& rhs)
- {
- return (this->ymin == rhs.ymin && this->ymax == rhs.ymax && this->xmin == rhs.xmin &&
- this->xmax == rhs.xmax);
- }
-
- bool operator!=(const SWR_RECT& rhs) { return !(*this == rhs); }
-
- SWR_RECT& Intersect(const SWR_RECT& other)
- {
- this->xmin = std::max(this->xmin, other.xmin);
- this->ymin = std::max(this->ymin, other.ymin);
- this->xmax = std::min(this->xmax, other.xmax);
- this->ymax = std::min(this->ymax, other.ymax);
-
- if (xmax - xmin < 0 || ymax - ymin < 0)
- {
- // Zero area
- ymin = ymax = xmin = xmax = 0;
- }
-
- return *this;
- }
- SWR_RECT& operator&=(const SWR_RECT& other) { return Intersect(other); }
-
- SWR_RECT& Union(const SWR_RECT& other)
- {
- this->xmin = std::min(this->xmin, other.xmin);
- this->ymin = std::min(this->ymin, other.ymin);
- this->xmax = std::max(this->xmax, other.xmax);
- this->ymax = std::max(this->ymax, other.ymax);
-
- return *this;
- }
-
- SWR_RECT& operator|=(const SWR_RECT& other) { return Union(other); }
-
- void Translate(int32_t x, int32_t y)
- {
- xmin += x;
- ymin += y;
- xmax += x;
- ymax += y;
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Function signature for load hot tiles
-/// @param hDC - handle to DRAW_CONTEXT
-/// @param dstFormat - format of the hot tile
-/// @param renderTargetIndex - render target to store, can be color, depth or stencil
-/// @param x - destination x coordinate
-/// @param y - destination y coordinate
-/// @param pDstHotTile - pointer to the hot tile surface
-typedef void(SWR_API* PFN_LOAD_TILE)(HANDLE hDC,
- HANDLE hWorkerPrivateData,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- uint8_t* pDstHotTile);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Function signature for store hot tiles
-/// @param hDC - handle to DRAW_CONTEXT
-/// @param srcFormat - format of the hot tile
-/// @param renderTargetIndex - render target to store, can be color, depth or stencil
-/// @param x - destination x coordinate
-/// @param y - destination y coordinate
-/// @param pSrcHotTile - pointer to the hot tile surface
-typedef void(SWR_API* PFN_STORE_TILE)(HANDLE hDC,
- HANDLE hWorkerPrivateData,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- uint8_t* pSrcHotTile);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Function signature for clearing from the hot tiles clear value
-/// @param hPrivateContext - handle to private data
-/// @param renderTargetIndex - render target to store, can be color, depth or stencil
-/// @param x - destination x coordinate
-/// @param y - destination y coordinate
-/// @param renderTargetArrayIndex - render target array offset from arrayIndex
-/// @param pClearColor - pointer to the hot tile's clear value
-typedef void(SWR_API* PFN_CLEAR_TILE)(HANDLE hPrivateContext,
- HANDLE hWorkerPrivateData,
- SWR_RENDERTARGET_ATTACHMENT rtIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- const float* pClearColor);
-
-typedef void*(SWR_API* PFN_TRANSLATE_GFXPTR_FOR_READ)(HANDLE hPrivateContext,
- gfxptr_t xpAddr,
- bool* pbNullTileAccessed,
- HANDLE hPrivateWorkerData);
-
-typedef void*(SWR_API* PFN_TRANSLATE_GFXPTR_FOR_WRITE)(HANDLE hPrivateContext,
- gfxptr_t xpAddr,
- bool* pbNullTileAccessed,
- HANDLE hPrivateWorkerData);
-
-typedef gfxptr_t(SWR_API* PFN_MAKE_GFXPTR)(HANDLE hPrivateContext, void* sysAddr);
-
-typedef HANDLE(SWR_API* PFN_CREATE_MEMORY_CONTEXT)(HANDLE hExternalMemory);
-
-typedef void(SWR_API* PFN_DESTROY_MEMORY_CONTEXT)(HANDLE hExternalMemory, HANDLE hMemoryContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Callback to allow driver to update their copy of streamout write offset.
-/// This is call is made for any draw operation that has streamout enabled
-/// and has updated the write offset.
-/// @param hPrivateContext - handle to private data
-/// @param soBufferSlot - buffer slot for write offset
-/// @param soWriteOffset - update value for so write offset.
-typedef void(SWR_API* PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
- uint32_t soBufferSlot,
- uint32_t soWriteOffset);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Callback to allow driver to update their copy of stats.
-/// @param hPrivateContext - handle to private data
-/// @param pStats - pointer to draw stats
-typedef void(SWR_API* PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS* pStats);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Callback to allow driver to update their copy of FE stats.
-/// @note Its optimal to have a separate callback for FE stats since
-/// there is only one DC per FE thread. This means we do not have
-/// to sum up the stats across all of the workers.
-/// @param hPrivateContext - handle to private data
-/// @param pStats - pointer to draw stats
-typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Callback to allow driver to update StreamOut status
-/// @param hPrivateContext - handle to private data
-/// @param numPrims - number of primitives written to StreamOut buffer
-typedef void(SWR_API* PFN_UPDATE_STREAMOUT)(HANDLE hPrivateContext, uint64_t numPrims);
-
-//////////////////////////////////////////////////////////////////////////
-/// BucketManager
-/// Forward Declaration (see rdtsc_buckets.h for full definition)
-/////////////////////////////////////////////////////////////////////////
-class BucketManager;
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_THREADING_INFO
-/////////////////////////////////////////////////////////////////////////
-struct SWR_THREADING_INFO
-{
- uint32_t BASE_NUMA_NODE;
- uint32_t BASE_CORE;
- uint32_t BASE_THREAD;
- uint32_t MAX_WORKER_THREADS;
- uint32_t MAX_NUMA_NODES;
- uint32_t MAX_CORES_PER_NUMA_NODE;
- uint32_t MAX_THREADS_PER_CORE;
- bool SINGLE_THREADED;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_API_THREADING_INFO
-/// Data used to reserve HW threads for API use
-/// API Threads are reserved from numa nodes / cores used for
-/// SWR Worker threads. Specifying reserved threads here can reduce
-/// the total number of SWR worker threads.
-/////////////////////////////////////////////////////////////////////////
-struct SWR_API_THREADING_INFO
-{
- uint32_t numAPIReservedThreads; // Default is 1 if SWR_API_THREADING_INFO is not sent
- uint32_t bindAPIThread0; // Default is true if numAPIReservedThreads is > 0,
- // binds thread used in SwrCreateContext to API Reserved
- // thread 0
- uint32_t numAPIThreadsPerCore; // 0 - means use all threads per core, else clamp to this number.
- // Independent of KNOB_MAX_THREADS_PER_CORE.
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_CONTEXT
-/// Forward Declaration (see context.h for full definition)
-/////////////////////////////////////////////////////////////////////////
-struct SWR_CONTEXT;
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_WORKER_PRIVATE_STATE
-/// Data used to allocate per-worker thread private data. A pointer
-/// to this data will be passed in to each shader function.
-/// The first field of this private data must be SWR_WORKER_DATA
-/// perWorkerPrivateStateSize must be >= sizeof SWR_WORKER_DATA
-/////////////////////////////////////////////////////////////////////////
-struct SWR_WORKER_PRIVATE_STATE
-{
- typedef void(SWR_API* PFN_WORKER_DATA)(SWR_CONTEXT* pContext, HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
-
- size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
- PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
- ///< worker data will be initialized to 0.
- PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data.
- ///< Can be null.
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_CREATECONTEXT_INFO
-/////////////////////////////////////////////////////////////////////////
-struct SWR_CREATECONTEXT_INFO
-{
- // External functions (e.g. sampler) need per draw context state.
- // Use SwrGetPrivateContextState() to access private state.
- size_t privateStateSize;
-
- // Optional per-worker state, can be NULL for no worker-private data
- SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
-
- // Callback functions
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
- PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
- PFN_MAKE_GFXPTR pfnMakeGfxPtr;
- PFN_CREATE_MEMORY_CONTEXT pfnCreateMemoryContext;
- PFN_DESTROY_MEMORY_CONTEXT pfnDestroyMemoryContext;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
- PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
- PFN_UPDATE_STREAMOUT pfnUpdateStreamOut;
-
-
- // Pointer to rdtsc buckets mgr returned to the caller.
- // Only populated when KNOB_ENABLE_RDTSC is set
- BucketManager* pBucketMgr;
-
- // Output: size required memory passed to for SwrSaveState / SwrRestoreState
- size_t contextSaveSize;
-
- // ArchRast event manager.
- HANDLE hArEventManager;
-
- // handle to external memory for worker data to create memory contexts
- HANDLE hExternalMemory;
-
- // Input (optional): Threading info that overrides any set KNOB values.
- SWR_THREADING_INFO* pThreadInfo;
-
- // Input (optional): Info for reserving API threads
- SWR_API_THREADING_INFO* pApiThreadInfo;
-
- // Input: if set to non-zero value, overrides KNOB value for maximum
- // number of draws in flight
- uint32_t MAX_DRAWS_IN_FLIGHT;
-
- std::string contextName;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Create SWR Context.
-/// @param pCreateInfo - pointer to creation info.
-SWR_FUNC(HANDLE, SwrCreateContext, SWR_CREATECONTEXT_INFO* pCreateInfo);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Destroys SWR Context.
-/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrDestroyContext, HANDLE hContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Bind current thread to an API reserved HW thread
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param apiThreadId - index of reserved HW thread to bind to.
-SWR_FUNC(void, SwrBindApiThread, HANDLE hContext, uint32_t apiThreadId);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Saves API state associated with hContext
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pOutputStateBlock - Memory block to receive API state data
-/// @param memSize - Size of memory pointed to by pOutputStateBlock
-SWR_FUNC(void, SwrSaveState, HANDLE hContext, void* pOutputStateBlock, size_t memSize);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Restores API state to hContext previously saved with SwrSaveState
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pStateBlock - Memory block to read API state data from
-/// @param memSize - Size of memory pointed to by pStateBlock
-SWR_FUNC(void, SwrRestoreState, HANDLE hContext, const void* pStateBlock, size_t memSize);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Sync cmd. Executes the callback func when all rendering up to this sync
-/// has been completed
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnFunc - pointer to callback function,
-/// @param userData - user data to pass back
-SWR_FUNC(void,
- SwrSync,
- HANDLE hContext,
- PFN_CALLBACK_FUNC pfnFunc,
- uint64_t userData,
- uint64_t userData2,
- uint64_t userData3);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Stall cmd. Stalls the backend until all previous work has been completed.
-/// Frontend work can continue to make progress
-/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrStallBE, HANDLE hContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Blocks until all rendering has been completed.
-/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrWaitForIdle, HANDLE hContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Blocks until all FE rendering has been completed.
-/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrWaitForIdleFE, HANDLE hContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set vertex buffer state.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param numBuffers - Number of vertex buffer state descriptors.
-/// @param pVertexBuffers - Array of vertex buffer state descriptors.
-SWR_FUNC(void,
- SwrSetVertexBuffers,
- HANDLE hContext,
- uint32_t numBuffers,
- const SWR_VERTEX_BUFFER_STATE* pVertexBuffers);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set index buffer
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pIndexBuffer - Index buffer.
-SWR_FUNC(void, SwrSetIndexBuffer, HANDLE hContext, const SWR_INDEX_BUFFER_STATE* pIndexBuffer);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set fetch shader pointer.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnFetchFunc - Pointer to shader.
-SWR_FUNC(void, SwrSetFetchFunc, HANDLE hContext, PFN_FETCH_FUNC pfnFetchFunc);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set streamout shader pointer.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnSoFunc - Pointer to shader.
-/// @param streamIndex - specifies stream
-SWR_FUNC(void, SwrSetSoFunc, HANDLE hContext, PFN_SO_FUNC pfnSoFunc, uint32_t streamIndex);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set streamout state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pSoState - Pointer to streamout state.
-SWR_FUNC(void, SwrSetSoState, HANDLE hContext, SWR_STREAMOUT_STATE* pSoState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set streamout buffer state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pSoBuffer - Pointer to streamout buffer.
-/// @param slot - Slot to bind SO buffer to.
-SWR_FUNC(void, SwrSetSoBuffers, HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t slot);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set vertex shader pointer.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnVertexFunc - Pointer to shader.
-SWR_FUNC(void, SwrSetVertexFunc, HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set frontend state.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetFrontendState, HANDLE hContext, SWR_FRONTEND_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set geometry shader state.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetGsState, HANDLE hContext, SWR_GS_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set geometry shader
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to geometry shader function
-SWR_FUNC(void, SwrSetGsFunc, HANDLE hContext, PFN_GS_FUNC pfnGsFunc);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set compute shader
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnCsFunc - Pointer to compute shader function
-/// @param totalThreadsInGroup - product of thread group dimensions.
-/// @param totalSpillFillSize - size in bytes needed for spill/fill.
-/// @param scratchSpaceSizePerInstance - size of the scratch space needed per simd instance
-/// @param numInstances - number of simd instances that are run per execution of the shader
-SWR_FUNC(void,
- SwrSetCsFunc,
- HANDLE hContext,
- PFN_CS_FUNC pfnCsFunc,
- uint32_t totalThreadsInGroup,
- uint32_t totalSpillFillSize,
- uint32_t scratchSpaceSizePerInstance,
- uint32_t numInstances);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set tessellation state.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetTsState, HANDLE hContext, SWR_TS_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set hull shader
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnFunc - Pointer to shader function
-SWR_FUNC(void, SwrSetHsFunc, HANDLE hContext, PFN_HS_FUNC pfnFunc);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set domain shader
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pfnFunc - Pointer to shader function
-SWR_FUNC(void, SwrSetDsFunc, HANDLE hContext, PFN_DS_FUNC pfnFunc);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set depth stencil state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetDepthStencilState, HANDLE hContext, SWR_DEPTH_STENCIL_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set backend state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetBackendState, HANDLE hContext, SWR_BACKEND_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set depth bounds state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetDepthBoundsState, HANDLE hContext, SWR_DEPTH_BOUNDS_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set pixel shader state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetPixelShaderState, HANDLE hContext, SWR_PS_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set blend state
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetBlendState, HANDLE hContext, SWR_BLEND_STATE* pState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Set blend function
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param renderTarget - render target index
-/// @param pfnBlendFunc - function pointer
-SWR_FUNC(
- void, SwrSetBlendFunc, HANDLE hContext, uint32_t renderTarget, PFN_BLEND_JIT_FUNC pfnBlendFunc);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDraw
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param startVertex - Specifies start vertex in vertex buffer for draw.
-/// @param primCount - Number of vertices.
-SWR_FUNC(void,
- SwrDraw,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t startVertex,
- uint32_t primCount);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDrawInstanced
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
-/// @param numInstances - How many instances to render.
-/// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer
-/// (instanced data)
-SWR_FUNC(void,
- SwrDrawInstanced,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numVertsPerInstance,
- uint32_t numInstances,
- uint32_t startVertex,
- uint32_t startInstance);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief DrawIndexed
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numIndices - Number of indices to read sequentially from index buffer.
-/// @param indexOffset - Starting index into index buffer.
-/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-SWR_FUNC(void,
- SwrDrawIndexed,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t indexOffset,
- int32_t baseVertex);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDrawIndexedInstanced
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param topology - Specifies topology for draw.
-/// @param numIndices - Number of indices to read sequentially from index buffer.
-/// @param numInstances - Number of instances to render.
-/// @param indexOffset - Starting index into index buffer.
-/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer
-/// (instanced data)
-SWR_FUNC(void,
- SwrDrawIndexedInstanced,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t numInstances,
- uint32_t indexOffset,
- int32_t baseVertex,
- uint32_t startInstance);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrInvalidateTiles
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
-/// invalidate.
-/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
-/// be hottile size-aligned.
-SWR_FUNC(void,
- SwrInvalidateTiles,
- HANDLE hContext,
- uint32_t attachmentMask,
- const SWR_RECT& invalidateRect);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDiscardRect
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
-/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
-/// discarded.
-SWR_FUNC(void, SwrDiscardRect, HANDLE hContext, uint32_t attachmentMask, const SWR_RECT& rect);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrDispatch
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param threadGroupCountX - Number of thread groups dispatched in X direction
-/// @param threadGroupCountY - Number of thread groups dispatched in Y direction
-/// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
-SWR_FUNC(void,
- SwrDispatch,
- HANDLE hContext,
- uint32_t threadGroupCountX,
- uint32_t threadGroupCountY,
- uint32_t threadGroupCountZ);
-
-/// @note this enum needs to be kept in sync with HOTTILE_STATE!
-enum SWR_TILE_STATE
-{
- SWR_TILE_INVALID = 0, // tile is in uninitialized state and should be loaded with surface contents
- // before rendering
- SWR_TILE_DIRTY = 2, // tile contains newer data than surface it represents
- SWR_TILE_RESOLVED = 3, // is in sync with surface it represents
-};
-
-/// @todo Add a good description for what attachments are and when and why you would use the
-/// different SWR_TILE_STATEs.
-SWR_FUNC(void,
- SwrStoreTiles,
- HANDLE hContext,
- uint32_t attachmentMask,
- SWR_TILE_STATE postStoreTileState,
- const SWR_RECT& storeRect);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear
-/// @param renderTargetArrayIndex - the RT array index to clear
-/// @param clearColor - color use for clearing render targets
-/// @param z - depth value use for clearing depth buffer
-/// @param stencil - stencil value used for clearing stencil buffer
-/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
-SWR_FUNC(void,
- SwrClearRenderTarget,
- HANDLE hContext,
- uint32_t attachmentMask,
- uint32_t renderTargetArrayIndex,
- const float clearColor[4],
- float z,
- uint8_t stencil,
- const SWR_RECT& clearRect);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrSetRastState
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands
-SWR_FUNC(void, SwrSetRastState, HANDLE hContext, const SWR_RASTSTATE* pRastState);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrSetViewports
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param numViewports - number of viewports passed in
-/// @param pViewports - Specifies extents of viewport.
-/// @param pMatrices - If not specified then SWR computes a default one.
-SWR_FUNC(void,
- SwrSetViewports,
- HANDLE hContext,
- uint32_t numViewports,
- const SWR_VIEWPORT* pViewports,
- const SWR_VIEWPORT_MATRICES* pMatrices);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SwrSetScissorRects
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param numScissors - number of scissors passed in
-/// @param pScissors - array of scissors
-SWR_FUNC(
- void, SwrSetScissorRects, HANDLE hContext, uint32_t numScissors, const SWR_RECT* pScissors);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Returns a pointer to the private context state for the current
-/// draw operation. This is used for external componets such as the
-/// sampler.
-///
-/// @note Client needs to resend private state prior to each draw call.
-/// Also, SWR is responsible for the private state memory.
-/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void*, SwrGetPrivateContextState, HANDLE hContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Clients can use this to allocate memory for draw/dispatch
-/// operations. The memory will automatically be freed once operation
-/// has completed. Client can use this to allocate binding tables,
-/// etc. needed for shader execution.
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param size - Size of allocation
-/// @param align - Alignment needed for allocation.
-SWR_FUNC(void*, SwrAllocDrawContextMemory, HANDLE hContext, uint32_t size, uint32_t align);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Enables stats counting
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param enable - If true then counts are incremented.
-SWR_FUNC(void, SwrEnableStatsFE, HANDLE hContext, bool enable);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Enables stats counting
-/// @param hContext - Handle passed back from SwrCreateContext
-/// @param enable - If true then counts are incremented.
-SWR_FUNC(void, SwrEnableStatsBE, HANDLE hContext, bool enable);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Mark end of frame - used for performance profiling
-/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrEndFrame, HANDLE hContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Initialize swr backend and memory internal tables
-SWR_FUNC(void, SwrInit);
-
-
-struct SWR_INTERFACE
-{
- PFNSwrCreateContext pfnSwrCreateContext;
- PFNSwrDestroyContext pfnSwrDestroyContext;
- PFNSwrBindApiThread pfnSwrBindApiThread;
- PFNSwrSaveState pfnSwrSaveState;
- PFNSwrRestoreState pfnSwrRestoreState;
- PFNSwrSync pfnSwrSync;
- PFNSwrStallBE pfnSwrStallBE;
- PFNSwrWaitForIdle pfnSwrWaitForIdle;
- PFNSwrWaitForIdleFE pfnSwrWaitForIdleFE;
- PFNSwrSetVertexBuffers pfnSwrSetVertexBuffers;
- PFNSwrSetIndexBuffer pfnSwrSetIndexBuffer;
- PFNSwrSetFetchFunc pfnSwrSetFetchFunc;
- PFNSwrSetSoFunc pfnSwrSetSoFunc;
- PFNSwrSetSoState pfnSwrSetSoState;
- PFNSwrSetSoBuffers pfnSwrSetSoBuffers;
- PFNSwrSetVertexFunc pfnSwrSetVertexFunc;
- PFNSwrSetFrontendState pfnSwrSetFrontendState;
- PFNSwrSetGsState pfnSwrSetGsState;
- PFNSwrSetGsFunc pfnSwrSetGsFunc;
- PFNSwrSetCsFunc pfnSwrSetCsFunc;
- PFNSwrSetTsState pfnSwrSetTsState;
- PFNSwrSetHsFunc pfnSwrSetHsFunc;
- PFNSwrSetDsFunc pfnSwrSetDsFunc;
- PFNSwrSetDepthStencilState pfnSwrSetDepthStencilState;
- PFNSwrSetBackendState pfnSwrSetBackendState;
- PFNSwrSetDepthBoundsState pfnSwrSetDepthBoundsState;
- PFNSwrSetPixelShaderState pfnSwrSetPixelShaderState;
- PFNSwrSetBlendState pfnSwrSetBlendState;
- PFNSwrSetBlendFunc pfnSwrSetBlendFunc;
- PFNSwrDraw pfnSwrDraw;
- PFNSwrDrawInstanced pfnSwrDrawInstanced;
- PFNSwrDrawIndexed pfnSwrDrawIndexed;
- PFNSwrDrawIndexedInstanced pfnSwrDrawIndexedInstanced;
- PFNSwrInvalidateTiles pfnSwrInvalidateTiles;
- PFNSwrDiscardRect pfnSwrDiscardRect;
- PFNSwrDispatch pfnSwrDispatch;
- PFNSwrStoreTiles pfnSwrStoreTiles;
- PFNSwrClearRenderTarget pfnSwrClearRenderTarget;
- PFNSwrSetRastState pfnSwrSetRastState;
- PFNSwrSetViewports pfnSwrSetViewports;
- PFNSwrSetScissorRects pfnSwrSetScissorRects;
- PFNSwrGetPrivateContextState pfnSwrGetPrivateContextState;
- PFNSwrAllocDrawContextMemory pfnSwrAllocDrawContextMemory;
- PFNSwrEnableStatsFE pfnSwrEnableStatsFE;
- PFNSwrEnableStatsBE pfnSwrEnableStatsBE;
- PFNSwrEndFrame pfnSwrEndFrame;
- PFNSwrInit pfnSwrInit;
-};
-
-extern "C" {
-typedef void(SWR_API* PFNSwrGetInterface)(SWR_INTERFACE& out_funcs);
-SWR_VISIBLE void SWR_API SwrGetInterface(SWR_INTERFACE& out_funcs);
-}
-
-#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h
deleted file mode 100644
index 831617c213f..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ /dev/null
@@ -1,490 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file arena.h
- *
- * @brief Arena memory manager
- * The arena is convenient and fast for managing allocations for any of
- * our allocations that are associated with operations and can all be freed
- * once when their operation has completed. Allocations are cheap since
- * most of the time its simply an increment of an offset. Also, no need to
- * free individual allocations. All of the arena memory can be freed at once.
- *
- ******************************************************************************/
-#pragma once
-
-#include <mutex>
-#include <algorithm>
-#include <atomic>
-#include "core/utils.h"
-
-static const size_t ARENA_BLOCK_ALIGN = 64;
-
-struct ArenaBlock
-{
- size_t blockSize = 0;
- ArenaBlock* pNext = nullptr;
-};
-static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
-
-class DefaultAllocator
-{
-public:
- ArenaBlock* AllocateAligned(size_t size, size_t align)
- {
- SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
-
- ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
- p->blockSize = size;
- return p;
- }
-
- void Free(ArenaBlock* pMem)
- {
- if (pMem)
- {
- SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
- AlignedFree(pMem);
- }
- }
-};
-
-// Caching Allocator for Arena
-template <uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
-struct CachingAllocatorT : DefaultAllocator
-{
- ArenaBlock* AllocateAligned(size_t size, size_t align)
- {
- SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
- SWR_ASSUME_ASSERT(size <= uint32_t(-1));
-
- uint32_t bucket = GetBucketId(size);
-
- {
- // search cached blocks
- std::lock_guard<std::mutex> l(m_mutex);
- ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
- ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
-
- if (pBlock)
- {
- m_cachedSize -= pBlock->blockSize;
- if (pBlock == m_pLastCachedBlocks[bucket])
- {
- m_pLastCachedBlocks[bucket] = pPrevBlock;
- }
- }
- else
- {
- pPrevBlock = &m_oldCachedBlocks[bucket];
- pBlock = SearchBlocks(pPrevBlock, size, align);
-
- if (pBlock)
- {
- m_oldCachedSize -= pBlock->blockSize;
- if (pBlock == m_pOldLastCachedBlocks[bucket])
- {
- m_pOldLastCachedBlocks[bucket] = pPrevBlock;
- }
- }
- }
-
- if (pBlock)
- {
- assert(pPrevBlock && pPrevBlock->pNext == pBlock);
- pPrevBlock->pNext = pBlock->pNext;
- pBlock->pNext = nullptr;
-
- return pBlock;
- }
-
- m_totalAllocated += size;
-
-#if 0
- {
- static uint32_t count = 0;
- char buf[128];
- sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
- OutputDebugStringA(buf);
- }
-#endif
- }
-
- if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
- {
- // Make all blocks in this bucket the same size
- size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
- }
-
- return this->DefaultAllocator::AllocateAligned(size, align);
- }
-
- void Free(ArenaBlock* pMem)
- {
- if (pMem)
- {
- std::unique_lock<std::mutex> l(m_mutex);
- InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
- }
- }
-
- void FreeOldBlocks()
- {
- if (!m_cachedSize)
- {
- return;
- }
- std::lock_guard<std::mutex> l(m_mutex);
-
- bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
-
- for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
- {
- if (doFree)
- {
- ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
- while (pBlock)
- {
- ArenaBlock* pNext = pBlock->pNext;
- m_oldCachedSize -= pBlock->blockSize;
- m_totalAllocated -= pBlock->blockSize;
- this->DefaultAllocator::Free(pBlock);
- pBlock = pNext;
- }
- m_oldCachedBlocks[i].pNext = nullptr;
- m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
- }
-
- if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
- {
- if (i && i < (CACHE_NUM_BUCKETS - 1))
- {
- // We know that all blocks are the same size.
- // Just move the list over.
- m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
- m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
- m_cachedBlocks[i].pNext = nullptr;
- if (m_pOldLastCachedBlocks[i]->pNext)
- {
- m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
- }
- m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
- }
- else
- {
- // The end buckets can have variable sized lists.
- // Insert each block based on size
- ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
- while (pBlock)
- {
- ArenaBlock* pNext = pBlock->pNext;
- pBlock->pNext = nullptr;
- m_cachedSize -= pBlock->blockSize;
- InsertCachedBlock<true>(i, pBlock);
- pBlock = pNext;
- }
-
- m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
- m_cachedBlocks[i].pNext = nullptr;
- }
- }
- }
-
- m_oldCachedSize += m_cachedSize;
- m_cachedSize = 0;
- }
-
- CachingAllocatorT()
- {
- for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
- {
- m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
- m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
- }
- }
-
- ~CachingAllocatorT()
- {
- // Free all cached blocks
- for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
- {
- ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
- while (pBlock)
- {
- ArenaBlock* pNext = pBlock->pNext;
- this->DefaultAllocator::Free(pBlock);
- pBlock = pNext;
- }
- pBlock = m_oldCachedBlocks[i].pNext;
- while (pBlock)
- {
- ArenaBlock* pNext = pBlock->pNext;
- this->DefaultAllocator::Free(pBlock);
- pBlock = pNext;
- }
- }
- }
-
-private:
- static uint32_t GetBucketId(size_t blockSize)
- {
- uint32_t bucketId = 0;
-
-#if defined(BitScanReverseSizeT)
- BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
- bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
-#endif
-
- return bucketId;
- }
-
- template <bool OldBlockT = false>
- void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
- {
- SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
-
- ArenaBlock* pPrevBlock =
- OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
- ArenaBlock* pBlock = pPrevBlock->pNext;
-
- while (pBlock)
- {
- if (pNewBlock->blockSize >= pBlock->blockSize)
- {
- // Insert here
- break;
- }
- pPrevBlock = pBlock;
- pBlock = pBlock->pNext;
- }
-
- // Insert into list
- SWR_ASSUME_ASSERT(pPrevBlock);
- pPrevBlock->pNext = pNewBlock;
- pNewBlock->pNext = pBlock;
-
- if (OldBlockT)
- {
- if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
- {
- m_pOldLastCachedBlocks[bucketId] = pNewBlock;
- }
-
- m_oldCachedSize += pNewBlock->blockSize;
- }
- else
- {
- if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
- {
- m_pLastCachedBlocks[bucketId] = pNewBlock;
- }
-
- m_cachedSize += pNewBlock->blockSize;
- }
- }
-
- static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
- {
- ArenaBlock* pBlock = pPrevBlock->pNext;
- ArenaBlock* pPotentialBlock = nullptr;
- ArenaBlock* pPotentialPrev = nullptr;
-
- while (pBlock)
- {
- if (pBlock->blockSize >= blockSize)
- {
- if (pBlock == AlignUp(pBlock, align))
- {
- if (pBlock->blockSize == blockSize)
- {
- // Won't find a better match
- break;
- }
-
- // We could use this as it is larger than we wanted, but
- // continue to search for a better match
- pPotentialBlock = pBlock;
- pPotentialPrev = pPrevBlock;
- }
- }
- else
- {
- // Blocks are sorted by size (biggest first)
- // So, if we get here, there are no blocks
- // large enough, fall through to allocation.
- pBlock = nullptr;
- break;
- }
-
- pPrevBlock = pBlock;
- pBlock = pBlock->pNext;
- }
-
- if (!pBlock)
- {
- // Couldn't find an exact match, use next biggest size
- pBlock = pPotentialBlock;
- pPrevBlock = pPotentialPrev;
- }
-
- return pBlock;
- }
-
- // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
- static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
- static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
- static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
-
- ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
- ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
- ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
- ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
- std::mutex m_mutex;
-
- size_t m_totalAllocated = 0;
-
- size_t m_cachedSize = 0;
- size_t m_oldCachedSize = 0;
-};
-typedef CachingAllocatorT<> CachingAllocator;
-
-template <typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
-class TArena
-{
-public:
- TArena(T& in_allocator) : m_allocator(in_allocator) {}
- TArena() : m_allocator(m_defAllocator) {}
- ~TArena() { Reset(true); }
-
- void* AllocAligned(size_t size, size_t align)
- {
- if (0 == size)
- {
- return nullptr;
- }
-
- SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
-
- if (m_pCurBlock)
- {
- ArenaBlock* pCurBlock = m_pCurBlock;
- size_t offset = AlignUp(m_offset, align);
-
- if ((offset + size) <= pCurBlock->blockSize)
- {
- void* pMem = PtrAdd(pCurBlock, offset);
- m_offset = offset + size;
- return pMem;
- }
-
- // Not enough memory in this block, fall through to allocate
- // a new block
- }
-
- static const size_t ArenaBlockSize = BlockSizeT;
- size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
-
- // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
- blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
-
- ArenaBlock* pNewBlock = m_allocator.AllocateAligned(
- blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
- SWR_ASSERT(pNewBlock != nullptr);
-
- if (pNewBlock != nullptr)
- {
- m_offset = ARENA_BLOCK_ALIGN;
- pNewBlock->pNext = m_pCurBlock;
-
- m_pCurBlock = pNewBlock;
- }
-
- return AllocAligned(size, align);
- }
-
- void* Alloc(size_t size) { return AllocAligned(size, 1); }
-
- void* AllocAlignedSync(size_t size, size_t align)
- {
- void* pAlloc = nullptr;
-
- m_mutex.lock();
- pAlloc = AllocAligned(size, align);
- m_mutex.unlock();
-
- return pAlloc;
- }
-
- void* AllocSync(size_t size)
- {
- void* pAlloc = nullptr;
-
- m_mutex.lock();
- pAlloc = Alloc(size);
- m_mutex.unlock();
-
- return pAlloc;
- }
-
- void Reset(bool removeAll = false)
- {
- m_offset = ARENA_BLOCK_ALIGN;
-
- if (m_pCurBlock)
- {
- ArenaBlock* pUsedBlocks = m_pCurBlock->pNext;
- m_pCurBlock->pNext = nullptr;
- while (pUsedBlocks)
- {
- ArenaBlock* pBlock = pUsedBlocks;
- pUsedBlocks = pBlock->pNext;
-
- m_allocator.Free(pBlock);
- }
-
- if (removeAll)
- {
- m_allocator.Free(m_pCurBlock);
- m_pCurBlock = nullptr;
- }
- }
- }
-
- bool IsEmpty()
- {
- return (m_pCurBlock == nullptr) ||
- (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
- }
-
-private:
- ArenaBlock* m_pCurBlock = nullptr;
- size_t m_offset = ARENA_BLOCK_ALIGN;
-
- /// @note Mutex is only used by sync allocation functions.
- std::mutex m_mutex;
-
- DefaultAllocator m_defAllocator;
- T& m_allocator;
-};
-
-using StdArena = TArena<DefaultAllocator>;
-using CachingArena = TArena<CachingAllocator>;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
deleted file mode 100644
index bb9d6f7dc52..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ /dev/null
@@ -1,420 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file backend.cpp
- *
- * @brief Backend handles rasterization, pixel shading and output merger
- * operations.
- *
- ******************************************************************************/
-
-#include <smmintrin.h>
-
-#include "backend.h"
-#include "backend_impl.h"
-#include "tilemgr.h"
-#include "memory/tilingtraits.h"
-#include "core/multisample.h"
-#include "backends/gen_BackendPixelRate.hpp"
-
-#include <algorithm>
-
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Process compute work.
-/// @param pDC - pointer to draw context (dispatch).
-/// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t threadGroupId,
- void*& pSpillFillBuffer,
- void*& pScratchSpace)
-{
- SWR_CONTEXT* pContext = pDC->pContext;
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEDispatch, pDC->drawId);
-
- const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
- SWR_ASSERT(pTaskData != nullptr);
-
- // Ensure spill fill memory has been allocated.
- size_t spillFillSize = pDC->pState->state.totalSpillFillSize;
- if (spillFillSize && pSpillFillBuffer == nullptr)
- {
- pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD16_BYTES);
- }
-
- size_t scratchSpaceSize =
- pDC->pState->state.scratchSpaceSizePerWarp * pDC->pState->state.scratchSpaceNumWarps;
- if (scratchSpaceSize && pScratchSpace == nullptr)
- {
- pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD16_BYTES);
- }
-
- const API_STATE& state = GetApiState(pDC);
-
- SWR_CS_CONTEXT csContext{0};
- csContext.tileCounter = threadGroupId;
- csContext.dispatchDims[0] = pTaskData->threadGroupCountX;
- csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
- csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
- csContext.pTGSM = pContext->ppScratch[workerId];
- csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
- csContext.pScratchSpace = (uint8_t*)pScratchSpace;
- csContext.scratchSpacePerWarp = pDC->pState->state.scratchSpaceSizePerWarp;
-
- state.pfnCsFunc(GetPrivateState(pDC),
- pContext->threadPool.pThreadData[workerId].pWorkerPrivateData,
- &csContext);
-
- UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
- AR_EVENT(CSStats((HANDLE)&csContext.stats));
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEDispatch, 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Process shutdown.
-/// @param pDC - pointer to draw context (dispatch).
-/// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
-{
- // Dummy function
-}
-
-void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
-{
- uint32_t x, y;
- MacroTileMgr::getTileIndices(macroTile, x, y);
- SWR_ASSERT(x == 0 && y == 0);
-}
-
-void ProcessStoreTileBE(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroTile,
- STORE_TILES_DESC* pDesc,
- SWR_RENDERTARGET_ATTACHMENT attachment)
-{
- SWR_CONTEXT* pContext = pDC->pContext;
- HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStoreTiles, pDC->drawId);
-
- SWR_FORMAT srcFormat;
- switch (attachment)
- {
- case SWR_ATTACHMENT_COLOR0:
- case SWR_ATTACHMENT_COLOR1:
- case SWR_ATTACHMENT_COLOR2:
- case SWR_ATTACHMENT_COLOR3:
- case SWR_ATTACHMENT_COLOR4:
- case SWR_ATTACHMENT_COLOR5:
- case SWR_ATTACHMENT_COLOR6:
- case SWR_ATTACHMENT_COLOR7:
- srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
- break;
- case SWR_ATTACHMENT_DEPTH:
- srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT;
- break;
- case SWR_ATTACHMENT_STENCIL:
- srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT;
- break;
- default:
- SWR_INVALID("Unknown attachment: %d", attachment);
- srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
- break;
- }
-
- uint32_t x, y;
- MacroTileMgr::getTileIndices(macroTile, x, y);
-
- // Only need to store the hottile if it's been rendered to...
- HOTTILE* pHotTile =
- pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
- if (pHotTile)
- {
- // clear if clear is pending (i.e., not rendered to), then mark as dirty for store.
- if (pHotTile->state == HOTTILE_CLEAR)
- {
- PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
- SWR_ASSERT(pfnClearTiles != nullptr);
-
- pfnClearTiles(pDC,
- hWorkerPrivateData,
- attachment,
- macroTile,
- pHotTile->renderTargetArrayIndex,
- pHotTile->clearData,
- pDesc->rect);
- }
-
- if (pHotTile->state == HOTTILE_DIRTY ||
- pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
- {
- int32_t destX = KNOB_MACROTILE_X_DIM * x;
- int32_t destY = KNOB_MACROTILE_Y_DIM * y;
-
- pContext->pfnStoreTile(pDC,
- hWorkerPrivateData,
- srcFormat,
- attachment,
- destX,
- destY,
- pHotTile->renderTargetArrayIndex,
- pHotTile->pBuffer);
- }
-
- if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
- {
- if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY &&
- pHotTile->state == HOTTILE_RESOLVED))
- {
- pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
- }
- }
- }
- RDTSC_END(pDC->pContext->pBucketMgr, BEStoreTiles, 1);
-}
-
-void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
-{
- STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pData;
-
- unsigned long rt = 0;
- uint32_t mask = pDesc->attachmentMask;
- while (_BitScanForward(&rt, mask))
- {
- mask &= ~(1 << rt);
- ProcessStoreTileBE(pDC, workerId, macroTile, pDesc, (SWR_RENDERTARGET_ATTACHMENT)rt);
- }
-}
-
-void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroTile,
- void* pData)
-{
- DISCARD_INVALIDATE_TILES_DESC* pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pData;
- SWR_CONTEXT* pContext = pDC->pContext;
-
- const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
-
- for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
- {
- if (pDesc->attachmentMask & (1 << i))
- {
- HOTTILE* pHotTile =
- pContext->pHotTileMgr->GetHotTileNoLoad(pContext,
- pDC,
- macroTile,
- (SWR_RENDERTARGET_ATTACHMENT)i,
- pDesc->createNewTiles,
- numSamples);
- if (pHotTile)
- {
- HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;;
- if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR)
- {
- if (newState == HOTTILE_INVALID)
- {
- // This is OK for APIs that explicitly allow discards
- // (for e.g. depth / stencil data)
- //SWR_INVALID("Discarding valid data!");
- }
- }
- pHotTile->state = newState;
- }
- }
- }
-}
-
-template <uint32_t sampleCountT>
-void BackendNullPS(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t x,
- uint32_t y,
- SWR_TRIANGLE_DESC& work,
- RenderOutputBuffers& renderBuffers)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BENullBackend, pDC->drawId);
- ///@todo: handle center multisample pattern
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
-
- const API_STATE& state = GetApiState(pDC);
-
- BarycentricCoeffs coeffs;
- SetupBarycentricCoeffs(&coeffs, work);
-
- uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(NULL, &pDepthBuffer, &pStencilBuffer, 0, renderBuffers);
-
- SWR_PS_CONTEXT psContext;
- // skip SetupPixelShaderContext(&psContext, ...); // not needed here
-
- RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
-
- simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
-
- const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
- const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
- for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
- {
- simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
-
- const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
-
- for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
- {
- // iterate over active samples
- unsigned long sample = 0;
- uint32_t sampleMask = state.blendState.sampleMask;
- while (_BitScanForward(&sample, sampleMask))
- {
- sampleMask &= ~(1 << sample);
-
- simdmask coverageMask = work.coverageMask[sample] & MASK;
-
- if (coverageMask)
- {
- // offset depth/stencil buffers current sample
- uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
-
- if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
- {
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
- "Unsupported depth hot tile format");
-
- const simdscalar z =
- _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
-
- const float minz = state.depthBoundsState.depthBoundsTestMinValue;
- const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
-
- coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
-
- // calculate per sample positions
- psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
- psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
-
- CalcSampleBarycentrics(coeffs, psContext);
-
- // interpolate and quantize z
- psContext.vZ = vplaneps(coeffs.vZa,
- coeffs.vZb,
- coeffs.vZc,
- psContext.vI.sample,
- psContext.vJ.sample);
- psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
-
- // interpolate user clip distance if available
- if (state.backendState.clipDistanceMask)
- {
- coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
- work.pUserClipBuffer,
- psContext.vI.sample,
- psContext.vJ.sample);
- }
-
- simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
- simdscalar stencilPassMask = vCoverageMask;
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
- simdscalar depthPassMask = DepthStencilTest(&state,
- work.triFlags.frontFacing,
- work.triFlags.viewportIndex,
- psContext.vZ,
- pDepthSample,
- vCoverageMask,
- pStencilSample,
- &stencilPassMask);
- AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask),
- _simd_movemask_ps(stencilPassMask),
- _simd_movemask_ps(vCoverageMask)));
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthSample,
- depthPassMask,
- vCoverageMask,
- pStencilSample,
- stencilPassMask);
- RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
-
- uint32_t statMask = _simd_movemask_ps(depthPassMask);
- uint32_t statCount = _mm_popcnt_u32(statMask);
- UPDATE_STAT_BE(DepthPassCount, statCount);
- }
-
- Endtile:
- ATTR_UNUSED;
- work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- }
-
- pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer +=
- (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
-
- vXSamplePosUL = _simd_add_ps(vXSamplePosUL, dx);
- }
-
- vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BENullBackend, 0);
-}
-
-PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};
-PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
-PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2] // centroid
- [2] // canEarlyZ
- = {};
-PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
- [SWR_INPUT_COVERAGE_COUNT][2] // centroid
- [2] // forcedSampleCount
- [2] // canEarlyZ
- = {};
-PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT]
- [2] // centroid
- [2] // canEarlyZ
- = {};
-
-void InitBackendFuncTables()
-{
- InitBackendPixelRate();
- InitBackendSingleFuncTable(gBackendSingleSample);
- InitBackendSampleFuncTable(gBackendSampleRateTable);
-
- gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS<SWR_MULTISAMPLE_1X>;
- gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS<SWR_MULTISAMPLE_2X>;
- gBackendNullPs[SWR_MULTISAMPLE_4X] = &BackendNullPS<SWR_MULTISAMPLE_4X>;
- gBackendNullPs[SWR_MULTISAMPLE_8X] = &BackendNullPS<SWR_MULTISAMPLE_8X>;
- gBackendNullPs[SWR_MULTISAMPLE_16X] = &BackendNullPS<SWR_MULTISAMPLE_16X>;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
deleted file mode 100644
index c9eb6c259e3..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file backend.h
- *
- * @brief Backend handles rasterization, pixel shading and output merger
- * operations.
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/os.h"
-#include "core/context.h"
-#include "core/multisample.h"
-#include "depthstencil.h"
-#include "rdtsc_core.h"
-
-void ProcessComputeBE(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t threadGroupId,
- void*& pSpillFillBuffer,
- void*& pScratchSpace);
-void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
-void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
-void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
-void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroTile,
- void* pData);
-void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
-
-typedef void (*PFN_CLEAR_TILES)(DRAW_CONTEXT*,
- HANDLE hWorkerData,
- SWR_RENDERTARGET_ATTACHMENT rt,
- uint32_t,
- uint32_t,
- uint32_t[4],
- const SWR_RECT& rect);
-
-extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
-extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
-extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2] // centroid
- [2]; // canEarlyZ
-extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
- [SWR_INPUT_COVERAGE_COUNT][2] // centroid
- [2] // forcedSampleCount
- [2] // canEarlyZ
- ;
-extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_INPUT_COVERAGE_COUNT][2] // centroid
- [2]; // canEarlyZ
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
deleted file mode 100644
index e772306faec..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
+++ /dev/null
@@ -1,308 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file backend.cpp
- *
- * @brief Backend handles rasterization, pixel shading and output merger
- * operations.
- *
- ******************************************************************************/
-
-#include <smmintrin.h>
-
-#include "backend.h"
-#include "backend_impl.h"
-#include "tilemgr.h"
-#include "memory/tilingtraits.h"
-#include "core/multisample.h"
-
-#include <algorithm>
-
-template <SWR_FORMAT format>
-void ClearRasterTile(uint8_t* pTileBuffer, simd16vector& value)
-{
- auto lambda = [&](int32_t comp)
- {
- FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
-
- pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
- };
-
- const uint32_t numIter =
- (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
-
- for (uint32_t i = 0; i < numIter; ++i)
- {
- UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
- }
-}
-
-template <SWR_FORMAT format>
-INLINE void ClearMacroTile(DRAW_CONTEXT* pDC,
- HANDLE hWorkerPrivateData,
- SWR_RENDERTARGET_ATTACHMENT rt,
- uint32_t macroTile,
- uint32_t renderTargetArrayIndex,
- uint32_t clear[4],
- const SWR_RECT& rect)
-{
- // convert clear color to hottile format
- // clear color is in RGBA float/uint32
-
- simd16vector vClear;
- for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
- {
- simd16scalar vComp = _simd16_load1_ps((const float*)&clear[comp]);
-
- if (FormatTraits<format>::isNormalized(comp))
- {
- vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
- vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
- }
- vComp = FormatTraits<format>::pack(comp, vComp);
-
- vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
- }
-
- uint32_t tileX, tileY;
- MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
-
- // Init to full macrotile
- SWR_RECT clearTile = {
- KNOB_MACROTILE_X_DIM * int32_t(tileX),
- KNOB_MACROTILE_Y_DIM * int32_t(tileY),
- KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
- KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
- };
-
- // intersect with clear rect
- clearTile &= rect;
-
- // translate to local hottile origin
- clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM,
- -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
-
- // Make maximums inclusive (needed for convert to raster tiles)
- clearTile.xmax -= 1;
- clearTile.ymax -= 1;
-
- // convert to raster tiles
- clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
- clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
- clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
- clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
-
- const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
- // compute steps between raster tile samples / raster tiles / macro tile rows
- const uint32_t rasterTileSampleStep =
- KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
- const uint32_t rasterTileStep =
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
- const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
- const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
-
- HOTTILE* pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext,
- pDC,
- hWorkerPrivateData,
- macroTile,
- rt,
- true,
- numSamples,
- renderTargetArrayIndex);
- uint32_t rasterTileStartOffset =
- (ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp>>(
- pitch, clearTile.xmin, clearTile.ymin)) *
- numSamples;
- uint8_t* pRasterTileRow =
- pHotTile->pBuffer +
- rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ,
- // FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
-
- // loop over all raster tiles in the current hot tile
- for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
- {
- uint8_t* pRasterTile = pRasterTileRow;
- for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
- {
- for (int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
- {
- ClearRasterTile<format>(pRasterTile, vClear);
- pRasterTile += rasterTileSampleStep;
- }
- }
- pRasterTileRow += macroTileRowStep;
- }
-
- pHotTile->state = HOTTILE_DIRTY;
-}
-
-void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
-{
- SWR_CONTEXT* pContext = pDC->pContext;
- HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- if (KNOB_FAST_CLEAR)
- {
- CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
- SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
- uint32_t numSamples = GetNumSamples(sampleCount);
-
- SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
-
- if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
- {
- unsigned long rt = 0;
- uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
- while (_BitScanForward(&rt, mask))
- {
- mask &= ~(1 << rt);
-
- HOTTILE* pHotTile =
- pContext->pHotTileMgr->GetHotTile(pContext,
- pDC,
- hWorkerPrivateData,
- macroTile,
- (SWR_RENDERTARGET_ATTACHMENT)rt,
- true,
- numSamples,
- pClear->renderTargetArrayIndex);
-
- // All we want to do here is to mark the hot tile as being in a "needs clear" state.
- pHotTile->clearData[0] = *(uint32_t*)&(pClear->clearRTColor[0]);
- pHotTile->clearData[1] = *(uint32_t*)&(pClear->clearRTColor[1]);
- pHotTile->clearData[2] = *(uint32_t*)&(pClear->clearRTColor[2]);
- pHotTile->clearData[3] = *(uint32_t*)&(pClear->clearRTColor[3]);
- pHotTile->state = HOTTILE_CLEAR;
- }
- }
-
- if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
- {
- HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
- pDC,
- hWorkerPrivateData,
- macroTile,
- SWR_ATTACHMENT_DEPTH,
- true,
- numSamples,
- pClear->renderTargetArrayIndex);
- pHotTile->clearData[0] = *(uint32_t*)&pClear->clearDepth;
- pHotTile->state = HOTTILE_CLEAR;
- }
-
- if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
- {
- HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
- pDC,
- hWorkerPrivateData,
- macroTile,
- SWR_ATTACHMENT_STENCIL,
- true,
- numSamples,
- pClear->renderTargetArrayIndex);
-
- pHotTile->clearData[0] = pClear->clearStencil;
- pHotTile->state = HOTTILE_CLEAR;
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
- }
- else
- {
- // Legacy clear
- CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
-
- if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
- {
- uint32_t clearData[4];
- clearData[0] = *(uint32_t*)&(pClear->clearRTColor[0]);
- clearData[1] = *(uint32_t*)&(pClear->clearRTColor[1]);
- clearData[2] = *(uint32_t*)&(pClear->clearRTColor[2]);
- clearData[3] = *(uint32_t*)&(pClear->clearRTColor[3]);
-
- PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
- SWR_ASSERT(pfnClearTiles != nullptr);
-
- unsigned long rt = 0;
- uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
- while (_BitScanForward(&rt, mask))
- {
- mask &= ~(1 << rt);
-
- pfnClearTiles(pDC,
- hWorkerPrivateData,
- (SWR_RENDERTARGET_ATTACHMENT)rt,
- macroTile,
- pClear->renderTargetArrayIndex,
- clearData,
- pClear->rect);
- }
- }
-
- if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
- {
- uint32_t clearData[4];
- clearData[0] = *(uint32_t*)&pClear->clearDepth;
- PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
- SWR_ASSERT(pfnClearTiles != nullptr);
-
- pfnClearTiles(pDC,
- hWorkerPrivateData,
- SWR_ATTACHMENT_DEPTH,
- macroTile,
- pClear->renderTargetArrayIndex,
- clearData,
- pClear->rect);
- }
-
- if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
- {
- uint32_t clearData[4];
- clearData[0] = pClear->clearStencil;
- PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
-
- pfnClearTiles(pDC,
- hWorkerPrivateData,
- SWR_ATTACHMENT_STENCIL,
- macroTile,
- pClear->renderTargetArrayIndex,
- clearData,
- pClear->rect);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
- }
-}
-
-void InitClearTilesTable()
-{
- memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
-
- gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
- gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
- gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
- gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
- gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
deleted file mode 100644
index 868419c3e4f..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ /dev/null
@@ -1,1300 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file backend.h
- *
- * @brief Backend handles rasterization, pixel shading and output merger
- * operations.
- *
- ******************************************************************************/
-#pragma once
-
-#include "tilemgr.h"
-#include "state.h"
-#include "context.h"
-
-
-void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
-void InitBackendSampleFuncTable(
- PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
-
-static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs,
- SWR_PS_CONTEXT& psContext);
-
-
-enum SWR_BACKEND_FUNCS
-{
- SWR_BACKEND_SINGLE_SAMPLE,
- SWR_BACKEND_MSAA_PIXEL_RATE,
- SWR_BACKEND_MSAA_SAMPLE_RATE,
- SWR_BACKEND_FUNCS_MAX,
-};
-
-#if KNOB_SIMD_WIDTH == 8
-static const __m256 vCenterOffsetsX = __m256{0.5, 1.5, 0.5, 1.5, 2.5, 3.5, 2.5, 3.5};
-static const __m256 vCenterOffsetsY = __m256{0.5, 0.5, 1.5, 1.5, 0.5, 0.5, 1.5, 1.5};
-static const __m256 vULOffsetsX = __m256{0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
-static const __m256 vULOffsetsY = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0};
-#define MASK 0xff
-#endif
-
-static INLINE simdmask ComputeUserClipMask(uint8_t clipMask,
- float* pUserClipBuffer,
- simdscalar const& vI,
- simdscalar const& vJ)
-{
- simdscalar vClipMask = _simd_setzero_ps();
- uint32_t numClipDistance = _mm_popcnt_u32(clipMask);
-
- for (uint32_t i = 0; i < numClipDistance; ++i)
- {
- // pull triangle clip distance values from clip buffer
- simdscalar vA = _simd_broadcast_ss(pUserClipBuffer++);
- simdscalar vB = _simd_broadcast_ss(pUserClipBuffer++);
- simdscalar vC = _simd_broadcast_ss(pUserClipBuffer++);
-
- // interpolate
- simdscalar vInterp = vplaneps(vA, vB, vC, vI, vJ);
-
- // clip if interpolated clip distance is < 0 || NAN
- simdscalar vCull = _simd_cmp_ps(_simd_setzero_ps(), vInterp, _CMP_NLE_UQ);
-
- vClipMask = _simd_or_ps(vClipMask, vCull);
- }
-
- return _simd_movemask_ps(vClipMask);
-}
-
-INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
-{
- static const uint32_t RasterTileColorOffsets[16]{
- 0,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
- 10,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
- 11,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
- 12,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
- 13,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
- 14,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
- 15,
- };
- assert(sampleNum < 16);
- return RasterTileColorOffsets[sampleNum];
-}
-
-INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
-{
- static const uint32_t RasterTileDepthOffsets[16]{
- 0,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
- 10,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
- 11,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
- 12,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
- 13,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
- 14,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
- 15,
- };
- assert(sampleNum < 16);
- return RasterTileDepthOffsets[sampleNum];
-}
-
-INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
-{
- static const uint32_t RasterTileStencilOffsets[16]{
- 0,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 2,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 3,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 4,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 5,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 6,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 7,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 8,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 9,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 10,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 11,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 12,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 13,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 14,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
- 15,
- };
- assert(sampleNum < 16);
- return RasterTileStencilOffsets[sampleNum];
-}
-
-template <typename T, uint32_t InputCoverage>
-struct generateInputCoverage
-{
- INLINE generateInputCoverage(const uint64_t* const coverageMask,
- uint32_t (&inputMask)[KNOB_SIMD_WIDTH],
- const uint32_t sampleMask)
- {
- // will need to update for avx512
- assert(KNOB_SIMD_WIDTH == 8);
-
- simdscalari mask[2];
- simdscalari sampleCoverage[2];
-
- if (T::bIsCenterPattern)
- {
- // center coverage is the same for all samples; just broadcast to the sample slots
- uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
- if (T::MultisampleT::numSamples == 1)
- {
- sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
- }
- else if (T::MultisampleT::numSamples == 2)
- {
- sampleCoverage[0] =
- _simd_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
- }
- else if (T::MultisampleT::numSamples == 4)
- {
- sampleCoverage[0] = _simd_set_epi32(
- 0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
- }
- else if (T::MultisampleT::numSamples == 8)
- {
- sampleCoverage[0] = _simd_set1_epi32(centerCoverage);
- }
- else if (T::MultisampleT::numSamples == 16)
- {
- sampleCoverage[0] = _simd_set1_epi32(centerCoverage);
- sampleCoverage[1] = _simd_set1_epi32(centerCoverage);
- }
- }
- else
- {
- simdscalari src = _simd_set1_epi32(0);
- simdscalari index0 = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
-
- if (T::MultisampleT::numSamples == 1)
- {
- mask[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
- }
- else if (T::MultisampleT::numSamples == 2)
- {
- mask[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
- }
- else if (T::MultisampleT::numSamples == 4)
- {
- mask[0] = _simd_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
- }
- else if (T::MultisampleT::numSamples == 8)
- {
- mask[0] = _simd_set1_epi32(-1);
- }
- else if (T::MultisampleT::numSamples == 16)
- {
- mask[0] = _simd_set1_epi32(-1);
- mask[1] = _simd_set1_epi32(-1);
- index1 = _simd_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
- }
-
- // gather coverage for samples 0-7
- sampleCoverage[0] =
- _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src),
- (const float*)coverageMask,
- index0,
- _mm256_castsi256_ps(mask[0]),
- 8));
- if (T::MultisampleT::numSamples > 8)
- {
- // gather coverage for samples 8-15
- sampleCoverage[1] =
- _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src),
- (const float*)coverageMask,
- index1,
- _mm256_castsi256_ps(mask[1]),
- 8));
- }
- }
-
- mask[0] = _mm256_set_epi8(-1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- 0xC,
- 0x8,
- 0x4,
- 0x0,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- 0xC,
- 0x8,
- 0x4,
- 0x0);
- // pull out the 8bit 4x2 coverage for samples 0-7 into the lower 32 bits of each 128bit lane
- simdscalari packedCoverage0 = _simd_shuffle_epi8(sampleCoverage[0], mask[0]);
-
- simdscalari packedCoverage1;
- if (T::MultisampleT::numSamples > 8)
- {
- // pull out the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit
- // lane
- packedCoverage1 = _simd_shuffle_epi8(sampleCoverage[1], mask[0]);
- }
-
-#if (KNOB_ARCH == KNOB_ARCH_AVX)
- // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
- simdscalari hiToLow = _mm256_permute2f128_si256(packedCoverage0, packedCoverage0, 0x83);
- simdscalar shufRes = _mm256_shuffle_ps(
- _mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
- packedCoverage0 = _mm256_castps_si256(
- _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
-
- simdscalari packedSampleCoverage;
- if (T::MultisampleT::numSamples > 8)
- {
- // pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
- hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
- shufRes = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow),
- _mm256_castsi256_ps(hiToLow),
- _MM_SHUFFLE(1, 1, 0, 1));
- shufRes = _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage1), shufRes, 0xFE);
- packedCoverage1 = _mm256_castps_si256(_mm256_castpd_ps(
- _mm256_shuffle_pd(_mm256_castps_pd(shufRes), _mm256_castps_pd(shufRes), 0x01)));
- packedSampleCoverage = _mm256_castps_si256(_mm256_blend_ps(
- _mm256_castsi256_ps(packedCoverage0), _mm256_castsi256_ps(packedCoverage1), 0xFC));
- }
- else
- {
- packedSampleCoverage = packedCoverage0;
- }
-#else
- simdscalari permMask = _simd_set_epi32(0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x4, 0x0);
- // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
- packedCoverage0 = _mm256_permutevar8x32_epi32(packedCoverage0, permMask);
-
- simdscalari packedSampleCoverage;
- if (T::MultisampleT::numSamples > 8)
- {
- permMask = _simd_set_epi32(0x7, 0x7, 0x7, 0x7, 0x4, 0x0, 0x7, 0x7);
- // pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
- packedCoverage1 = _mm256_permutevar8x32_epi32(packedCoverage1, permMask);
-
- // blend coverage masks for samples 0-7 and samples 8-15 into single 128 bit lane
- packedSampleCoverage = _mm256_blend_epi32(packedCoverage0, packedCoverage1, 0x0C);
- }
- else
- {
- packedSampleCoverage = packedCoverage0;
- }
-#endif
-
- for (int32_t i = KNOB_SIMD_WIDTH - 1; i >= 0; i--)
- {
- // convert packed sample coverage masks into single coverage masks for all samples for
- // each pixel in the 4x2
- inputMask[i] = _simd_movemask_epi8(packedSampleCoverage);
-
- if (!T::bForcedSampleCount)
- {
- // input coverage has to be anded with sample mask if MSAA isn't forced on
- inputMask[i] &= sampleMask;
- }
-
- // shift to the next pixel in the 4x2
- packedSampleCoverage = _simd_slli_epi32(packedSampleCoverage, 1);
- }
- }
-
- INLINE generateInputCoverage(const uint64_t* const coverageMask,
- simdscalar& inputCoverage,
- const uint32_t sampleMask)
- {
- uint32_t inputMask[KNOB_SIMD_WIDTH];
- generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask);
- inputCoverage = _simd_castsi_ps(_simd_set_epi32(inputMask[7],
- inputMask[6],
- inputMask[5],
- inputMask[4],
- inputMask[3],
- inputMask[2],
- inputMask[1],
- inputMask[0]));
- }
-};
-
-template <typename T>
-struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
-{
- INLINE generateInputCoverage(const uint64_t* const coverageMask,
- simdscalar& inputCoverage,
- const uint32_t sampleMask)
- {
- // will need to update for avx512
- assert(KNOB_SIMD_WIDTH == 8);
- simdscalari vec = _simd_set1_epi32(coverageMask[0]);
- const simdscalari bit = _simd_set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = _simd_and_si(vec, bit);
- vec = _simd_cmplt_epi32(_simd_setzero_si(), vec);
- vec = _simd_blendv_epi32(_simd_setzero_si(), _simd_set1_epi32(1), vec);
- inputCoverage = _simd_castsi_ps(vec);
- }
-
- INLINE generateInputCoverage(const uint64_t* const coverageMask,
- uint32_t (&inputMask)[KNOB_SIMD_WIDTH],
- const uint32_t sampleMask)
- {
- uint32_t simdCoverage = (coverageMask[0] & MASK);
- static const uint32_t FullCoverageMask = (1 << T::MultisampleT::numSamples) - 1;
- for (int i = 0; i < KNOB_SIMD_WIDTH; i++)
- {
- // set all samples to covered if conservative coverage mask is set for that pixel
- inputMask[i] = (((1 << i) & simdCoverage) > 0) ? FullCoverageMask : 0;
- }
- }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// Centroid behaves exactly as follows :
-// (1) If all samples in the primitive are covered, the attribute is evaluated at the pixel center
-// (even if the sample pattern does not happen to
-// have a sample location there).
-// (2) Else the attribute is evaluated at the first covered sample, in increasing order of sample
-// index, where sample coverage is after ANDing the
-// coverage with the SampleMask Rasterizer State.
-// (3) If no samples are covered, such as on helper pixels executed off the bounds of a primitive to
-// fill out 2x2 pixel stamps, the attribute is
-// evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the
-// pixel, then the first sample covered by the SampleMask Rasterizer State is the evaluation
-// point.Otherwise (full SampleMask), the pixel center is the evaluation point.
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-template <typename T>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT& psContext,
- const SWR_MULTISAMPLE_POS& samplePos,
- const uint64_t* const coverageMask,
- const uint32_t sampleMask,
- simdscalar const& vXSamplePosUL,
- simdscalar const& vYSamplePosUL)
-{
- uint32_t inputMask[KNOB_SIMD_WIDTH];
- generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask);
-
- // Case (2) - partially covered pixel
-
- // scan for first covered sample per pixel in the 4x2 span
- unsigned long sampleNum[KNOB_SIMD_WIDTH];
- (inputMask[0] > 0) ? (_BitScanForward(&sampleNum[0], inputMask[0])) : (sampleNum[0] = 0);
- (inputMask[1] > 0) ? (_BitScanForward(&sampleNum[1], inputMask[1])) : (sampleNum[1] = 0);
- (inputMask[2] > 0) ? (_BitScanForward(&sampleNum[2], inputMask[2])) : (sampleNum[2] = 0);
- (inputMask[3] > 0) ? (_BitScanForward(&sampleNum[3], inputMask[3])) : (sampleNum[3] = 0);
- (inputMask[4] > 0) ? (_BitScanForward(&sampleNum[4], inputMask[4])) : (sampleNum[4] = 0);
- (inputMask[5] > 0) ? (_BitScanForward(&sampleNum[5], inputMask[5])) : (sampleNum[5] = 0);
- (inputMask[6] > 0) ? (_BitScanForward(&sampleNum[6], inputMask[6])) : (sampleNum[6] = 0);
- (inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
-
- // look up and set the sample offsets from UL pixel corner for first covered sample
- simdscalar vXSample = _simd_set_ps(samplePos.X(sampleNum[7]),
- samplePos.X(sampleNum[6]),
- samplePos.X(sampleNum[5]),
- samplePos.X(sampleNum[4]),
- samplePos.X(sampleNum[3]),
- samplePos.X(sampleNum[2]),
- samplePos.X(sampleNum[1]),
- samplePos.X(sampleNum[0]));
-
- simdscalar vYSample = _simd_set_ps(samplePos.Y(sampleNum[7]),
- samplePos.Y(sampleNum[6]),
- samplePos.Y(sampleNum[5]),
- samplePos.Y(sampleNum[4]),
- samplePos.Y(sampleNum[3]),
- samplePos.Y(sampleNum[2]),
- samplePos.Y(sampleNum[1]),
- samplePos.Y(sampleNum[0]));
- // add sample offset to UL pixel corner
- vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
- vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
-
- // Case (1) and case (3b) - All samples covered or not covered with full SampleMask
- static const simdscalari vFullyCoveredMask = T::MultisampleT::FullSampleMask();
- simdscalari vInputCoveragei = _simd_set_epi32(inputMask[7],
- inputMask[6],
- inputMask[5],
- inputMask[4],
- inputMask[3],
- inputMask[2],
- inputMask[1],
- inputMask[0]);
- simdscalari vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask);
-
- static const simdscalari vZero = _simd_setzero_si();
- const simdscalari vSampleMask = _simd_and_si(_simd_set1_epi32(sampleMask), vFullyCoveredMask);
- simdscalari vNoSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vZero);
- simdscalari vIsFullSampleMask = _simd_cmpeq_epi32(vSampleMask, vFullyCoveredMask);
- simdscalari vCase3b = _simd_and_si(vNoSamplesCovered, vIsFullSampleMask);
-
- simdscalari vEvalAtCenter = _simd_or_si(vAllSamplesCovered, vCase3b);
-
- // set the centroid position based on results from above
- psContext.vX.centroid =
- _simd_blendv_ps(vXSample, psContext.vX.center, _simd_castsi_ps(vEvalAtCenter));
- psContext.vY.centroid =
- _simd_blendv_ps(vYSample, psContext.vY.center, _simd_castsi_ps(vEvalAtCenter));
-
- // Case (3a) No samples covered and partial sample mask
- simdscalari vSomeSampleMaskSamples = _simd_cmplt_epi32(vSampleMask, vFullyCoveredMask);
- // sample mask should never be all 0's for this case, but handle it anyways
- unsigned long firstCoveredSampleMaskSample = 0;
- (sampleMask > 0) ? (_BitScanForward(&firstCoveredSampleMaskSample, sampleMask))
- : (firstCoveredSampleMaskSample = 0);
-
- simdscalari vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
-
- vXSample = _simd_set1_ps(samplePos.X(firstCoveredSampleMaskSample));
- vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
-
- // blend in case 3a pixel locations
- psContext.vX.centroid =
- _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
- psContext.vY.centroid =
- _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
-}
-
-INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs,
- SWR_PS_CONTEXT& psContext,
- const simdscalar& vXSamplePosUL,
- const simdscalar& vYSamplePosUL)
-{
- // evaluate I,J
- psContext.vI.centroid =
- vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vJ.centroid =
- vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
- psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW,
- coeffs.vBOneOverW,
- coeffs.vCOneOverW,
- psContext.vI.centroid,
- psContext.vJ.centroid);
-}
-
-INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const& z, float minz, float maxz)
-{
- const simdscalar minzMask = _simd_cmpge_ps(z, _simd_set1_ps(minz));
- const simdscalar maxzMask = _simd_cmple_ps(z, _simd_set1_ps(maxz));
-
- return _simd_movemask_ps(_simd_and_ps(minzMask, maxzMask));
-}
-
-template <typename T>
-INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
-{
- // RT has to be single sample if we're in forcedMSAA mode
- if (T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
- {
- return 1;
- }
- // unless we're forced to single sample, in which case we run the OM at the sample count of the
- // RT
- else if (T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
- {
- return GetNumSamples(blendSampleCount);
- }
- // else we're in normal MSAA mode and rasterizer and OM are running at the same sample count
- else
- {
- return T::MultisampleT::numSamples;
- }
-}
-
-inline void SetupBarycentricCoeffs(BarycentricCoeffs* coeffs, const SWR_TRIANGLE_DESC& work)
-{
- // broadcast scalars
-
- coeffs->vIa = _simd_broadcast_ss(&work.I[0]);
- coeffs->vIb = _simd_broadcast_ss(&work.I[1]);
- coeffs->vIc = _simd_broadcast_ss(&work.I[2]);
-
- coeffs->vJa = _simd_broadcast_ss(&work.J[0]);
- coeffs->vJb = _simd_broadcast_ss(&work.J[1]);
- coeffs->vJc = _simd_broadcast_ss(&work.J[2]);
-
- coeffs->vZa = _simd_broadcast_ss(&work.Z[0]);
- coeffs->vZb = _simd_broadcast_ss(&work.Z[1]);
- coeffs->vZc = _simd_broadcast_ss(&work.Z[2]);
-
- coeffs->vRecipDet = _simd_broadcast_ss(&work.recipDet);
-
- coeffs->vAOneOverW = _simd_broadcast_ss(&work.OneOverW[0]);
- coeffs->vBOneOverW = _simd_broadcast_ss(&work.OneOverW[1]);
- coeffs->vCOneOverW = _simd_broadcast_ss(&work.OneOverW[2]);
-}
-
-inline void SetupRenderBuffers(uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS],
- uint8_t** pDepthBuffer,
- uint8_t** pStencilBuffer,
- uint32_t colorHotTileMask,
- RenderOutputBuffers& renderBuffers)
-{
- unsigned long index;
- while (_BitScanForward(&index, colorHotTileMask))
- {
- assert(index < SWR_NUM_RENDERTARGETS);
- colorHotTileMask &= ~(1 << index);
- pColorBuffer[index] = renderBuffers.pColor[index];
- }
-
- if (pDepthBuffer)
- {
- *pDepthBuffer = renderBuffers.pDepth;
- }
-
- if (pStencilBuffer)
- {
- *pStencilBuffer = renderBuffers.pStencil;
- ;
- }
-}
-
-INLINE void SetRenderHotTilesDirty(DRAW_CONTEXT* pDC, RenderOutputBuffers& renderBuffers)
-{
- const API_STATE& state = GetApiState(pDC);
-
- unsigned long rtSlot = 0;
- uint32_t colorHottileEnableMask = state.colorHottileEnable;
- while (_BitScanForward(&rtSlot, colorHottileEnableMask))
- {
- colorHottileEnableMask &= ~(1 << rtSlot);
- renderBuffers.pColorHotTile[rtSlot]->state = HOTTILE_DIRTY;
- }
-}
-
-template <typename T>
-void SetupPixelShaderContext(SWR_PS_CONTEXT* psContext,
- const SWR_MULTISAMPLE_POS& samplePos,
- SWR_TRIANGLE_DESC& work)
-{
- psContext->pAttribs = work.pAttribs;
- psContext->pPerspAttribs = work.pPerspAttribs;
- psContext->frontFace = work.triFlags.frontFacing;
- psContext->renderTargetArrayIndex = work.triFlags.renderTargetArrayIndex;
- psContext->viewportIndex = work.triFlags.viewportIndex;
-
- // save Ia/Ib/Ic and Ja/Jb/Jc if we need to reevaluate i/j/k in the shader because of pull
- // attribs
- psContext->I = work.I;
- psContext->J = work.J;
-
- psContext->recipDet = work.recipDet;
- psContext->pRecipW = work.pRecipW;
- psContext->pSamplePosX =
- samplePos.X(); // reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
- psContext->pSamplePosY =
- samplePos.Y(); // reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
- psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
- psContext->sampleIndex = 0;
-}
-
-template <typename T, bool IsSingleSample>
-void CalcCentroid(SWR_PS_CONTEXT* psContext,
- const SWR_MULTISAMPLE_POS& samplePos,
- const BarycentricCoeffs& coeffs,
- const uint64_t* const coverageMask,
- uint32_t sampleMask)
-{
- if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid
- // positions are still different
- {
- // for 1x case, centroid is pixel center
- psContext->vX.centroid = psContext->vX.center;
- psContext->vY.centroid = psContext->vY.center;
- psContext->vI.centroid = psContext->vI.center;
- psContext->vJ.centroid = psContext->vJ.center;
- psContext->vOneOverW.centroid = psContext->vOneOverW.center;
- }
- else
- {
- if (T::bCentroidPos)
- {
- ///@ todo: don't need to genererate input coverage 2x if input coverage and centroid
- if (T::bIsCenterPattern)
- {
- psContext->vX.centroid = _simd_add_ps(psContext->vX.UL, _simd_set1_ps(0.5f));
- psContext->vY.centroid = _simd_add_ps(psContext->vY.UL, _simd_set1_ps(0.5f));
- }
- else
- {
- // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate
- // coverage 2X'..
- CalcCentroidPos<T>(*psContext,
- samplePos,
- coverageMask,
- sampleMask,
- psContext->vX.UL,
- psContext->vY.UL);
- }
-
- CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
- }
- else
- {
- psContext->vX.centroid = psContext->vX.sample;
- psContext->vY.centroid = psContext->vY.sample;
- }
- }
-}
-
-template <typename T>
-struct PixelRateZTestLoop
-{
- PixelRateZTestLoop(DRAW_CONTEXT* DC,
- uint32_t _workerId,
- const SWR_TRIANGLE_DESC& Work,
- const BarycentricCoeffs& Coeffs,
- const API_STATE& apiState,
- uint8_t*& depthBuffer,
- uint8_t*& stencilBuffer,
- const uint8_t ClipDistanceMask) :
- pDC(DC),
- workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
- samplePos(state.rastState.samplePositions), clipDistanceMask(ClipDistanceMask),
- pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
-
- INLINE
- uint32_t operator()(simdscalar& activeLanes,
- SWR_PS_CONTEXT& psContext,
- const CORE_BUCKETS BEDepthBucket,
- uint32_t currentSimdIn8x8 = 0)
- {
-
- uint32_t statCount = 0;
- simdscalar anyDepthSamplePassed = _simd_setzero_ps();
- for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
- {
- const uint8_t* pCoverageMask = (uint8_t*)&work.coverageMask[sample];
- vCoverageMask[sample] =
- _simd_and_ps(activeLanes, _simd_vmask_ps(pCoverageMask[currentSimdIn8x8] & MASK));
-
- if (!_simd_movemask_ps(vCoverageMask[sample]))
- {
- vCoverageMask[sample] = depthPassMask[sample] = stencilPassMask[sample] =
- _simd_setzero_ps();
- continue;
- }
-
- // offset depth/stencil buffers current sample
- uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
-
- if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
- {
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
- "Unsupported depth hot tile format");
-
- const simdscalar z = _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
-
- const float minz = state.depthBoundsState.depthBoundsTestMinValue;
- const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
-
- vCoverageMask[sample] =
- _simd_and_ps(vCoverageMask[sample],
- _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
- }
-
- RDTSC_BEGIN(psContext.pBucketManager, BEBarycentric, pDC->drawId);
-
- // calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
-
- // calc I & J per sample
- CalcSampleBarycentrics(coeffs, psContext);
-
- if (psState.writesODepth)
- {
- {
- // broadcast and test oDepth(psContext.vZ) written from the PS for each sample
- vZ[sample] = psContext.vZ;
- }
- }
- else
- {
- vZ[sample] = vplaneps(
- coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
- vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
- }
-
- RDTSC_END(psContext.pBucketManager, BEBarycentric, 0);
-
- ///@todo: perspective correct vs non-perspective correct clipping?
- // if clip distances are enabled, we need to interpolate for each sample
- if (clipDistanceMask)
- {
- uint8_t clipMask = ComputeUserClipMask(clipDistanceMask,
- work.pUserClipBuffer,
- psContext.vI.sample,
- psContext.vJ.sample);
-
- vCoverageMask[sample] =
- _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(~clipMask));
- }
-
- // ZTest for this sample
- ///@todo Need to uncomment out this bucket.
- // RDTSC_BEGIN(psContext.pBucketManager, BEDepthBucket, pDC->drawId);
- depthPassMask[sample] = vCoverageMask[sample];
- stencilPassMask[sample] = vCoverageMask[sample];
- depthPassMask[sample] = DepthStencilTest(&state,
- work.triFlags.frontFacing,
- work.triFlags.viewportIndex,
- vZ[sample],
- pDepthSample,
- vCoverageMask[sample],
- pStencilSample,
- &stencilPassMask[sample]);
- // RDTSC_END(psContext.pBucketManager, BEDepthBucket, 0);
-
- // early-exit if no pixels passed depth or earlyZ is forced on
- if (psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
- {
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- vZ[sample],
- pDepthSample,
- depthPassMask[sample],
- vCoverageMask[sample],
- pStencilSample,
- stencilPassMask[sample]);
-
- if (!_simd_movemask_ps(depthPassMask[sample]))
- {
- continue;
- }
- }
- anyDepthSamplePassed = _simd_or_ps(anyDepthSamplePassed, depthPassMask[sample]);
- uint32_t statMask = _simd_movemask_ps(depthPassMask[sample]);
- statCount += _mm_popcnt_u32(statMask);
- }
-
- activeLanes = _simd_and_ps(anyDepthSamplePassed, activeLanes);
- // return number of samples that passed depth and coverage
- return statCount;
- }
-
- // saved depth/stencil/coverage masks and interpolated Z used in OM and DepthWrite
- simdscalar vZ[T::MultisampleT::numCoverageSamples];
- simdscalar vCoverageMask[T::MultisampleT::numCoverageSamples];
- simdscalar depthPassMask[T::MultisampleT::numCoverageSamples];
- simdscalar stencilPassMask[T::MultisampleT::numCoverageSamples];
-
-private:
- // functor inputs
- DRAW_CONTEXT* pDC;
- uint32_t workerId;
-
- const SWR_TRIANGLE_DESC& work;
- const BarycentricCoeffs& coeffs;
- const API_STATE& state;
- const SWR_PS_STATE& psState;
- const SWR_MULTISAMPLE_POS& samplePos;
- const uint8_t clipDistanceMask;
- uint8_t*& pDepthBuffer;
- uint8_t*& pStencilBuffer;
-};
-
-INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT& psContext)
-{
- // evaluate I,J
- psContext.vI.center =
- vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
- psContext.vJ.center =
- vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.center, psContext.vY.center);
- psContext.vI.center = _simd_mul_ps(psContext.vI.center, coeffs.vRecipDet);
- psContext.vJ.center = _simd_mul_ps(psContext.vJ.center, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.center = vplaneps(coeffs.vAOneOverW,
- coeffs.vBOneOverW,
- coeffs.vCOneOverW,
- psContext.vI.center,
- psContext.vJ.center);
-}
-
-static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs,
- SWR_PS_CONTEXT& psContext)
-{
- // evaluate I,J
- psContext.vI.sample =
- vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
- psContext.vJ.sample =
- vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.sample, psContext.vY.sample);
- psContext.vI.sample = _simd_mul_ps(psContext.vI.sample, coeffs.vRecipDet);
- psContext.vJ.sample = _simd_mul_ps(psContext.vJ.sample, coeffs.vRecipDet);
-
- // interpolate 1/w
- psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW,
- coeffs.vBOneOverW,
- coeffs.vCOneOverW,
- psContext.vI.sample,
- psContext.vJ.sample);
-}
-
-// Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(DRAW_CONTEXT* pDC,
- SWR_PS_CONTEXT& psContext,
- uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS],
- uint32_t sample,
- const SWR_BLEND_STATE* pBlendState,
- const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS],
- simdscalar& coverageMask,
- simdscalar const& depthPassMask,
- uint32_t renderTargetMask,
- bool useAlternateOffset,
- uint32_t workerId)
-{
- // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
- uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
-
- if (useAlternateOffset)
- {
- rasterTileColorOffset += sizeof(simdscalar);
- }
-
- simdvector blendSrc;
- simdvector blendOut;
-
- unsigned long rt;
- while (_BitScanForward(&rt, renderTargetMask))
- {
- renderTargetMask &= ~(1 << rt);
-
- const SWR_RENDER_TARGET_BLEND_STATE* pRTBlend = &pBlendState->renderTarget[rt];
-
- simdscalar* pColorSample;
- bool hotTileEnable = !pRTBlend->writeDisableAlpha || !pRTBlend->writeDisableRed ||
- !pRTBlend->writeDisableGreen || !pRTBlend->writeDisableBlue;
- if (hotTileEnable)
- {
- pColorSample = reinterpret_cast<simdscalar*>(pColorBase[rt] + rasterTileColorOffset);
- blendSrc[0] = pColorSample[0];
- blendSrc[1] = pColorSample[2];
- blendSrc[2] = pColorSample[4];
- blendSrc[3] = pColorSample[6];
- }
- else
- {
- pColorSample = nullptr;
- }
-
- SWR_BLEND_CONTEXT blendContext = {0};
- {
- // pfnBlendFunc may not update all channels. Initialize with PS output.
- /// TODO: move this into the blend JIT.
- blendOut = psContext.shaded[rt];
-
- blendContext.pBlendState = pBlendState;
- blendContext.src = &psContext.shaded[rt];
- blendContext.src1 = &psContext.shaded[1];
- blendContext.src0alpha = reinterpret_cast<simdvector*>(&psContext.shaded[0].w);
- blendContext.sampleNum = sample;
- blendContext.pDst = &blendSrc;
- blendContext.result = &blendOut;
- blendContext.oMask = &psContext.oMask;
- blendContext.pMask = reinterpret_cast<simdscalari*>(&coverageMask);
-
- // Blend outputs and update coverage mask for alpha test
- if (pfnBlendFunc[rt] != nullptr)
- {
- pfnBlendFunc[rt](&blendContext);
- }
- }
-
- // Track alpha events
- AR_EVENT(
- AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
-
- // final write mask
- simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
-
- ///@todo can only use maskstore fast path if bpc is 32. Assuming hot tile is RGBA32_FLOAT.
- static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
- "Unsupported hot tile format");
-
- // store with color mask
- if (!pRTBlend->writeDisableRed)
- {
- _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[0]), outputMask, blendOut.x);
- }
- if (!pRTBlend->writeDisableGreen)
- {
- _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[2]), outputMask, blendOut.y);
- }
- if (!pRTBlend->writeDisableBlue)
- {
- _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[4]), outputMask, blendOut.z);
- }
- if (!pRTBlend->writeDisableAlpha)
- {
- _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[6]), outputMask, blendOut.w);
- }
- }
-}
-
-template <typename T>
-void BackendPixelRate(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t x,
- uint32_t y,
- SWR_TRIANGLE_DESC& work,
- RenderOutputBuffers& renderBuffers)
-{
- ///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the
- /// backend
-
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelRateBackend, pDC->drawId);
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
-
- const API_STATE& state = GetApiState(pDC);
-
- BarycentricCoeffs coeffs;
- SetupBarycentricCoeffs(&coeffs, work);
-
- SWR_CONTEXT* pContext = pDC->pContext;
- void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- SWR_PS_CONTEXT psContext;
- const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
- SetupPixelShaderContext<T>(&psContext, samplePos, work);
-
- uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer,
- &pDepthBuffer,
- &pStencilBuffer,
- state.colorHottileEnable,
- renderBuffers);
-
- bool isTileDirty = false;
-
- RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
-
- PixelRateZTestLoop<T> PixelRateZTest(pDC,
- workerId,
- work,
- coeffs,
- state,
- pDepthBuffer,
- pStencilBuffer,
- state.backendState.clipDistanceMask);
-
- psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
- psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
-
- const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
-
- for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
- {
- psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
- psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
-
- const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
-
- for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
- {
- const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
-
-
- simdscalar activeLanes;
- if (!(work.anyCoveredSamples & MASK))
- {
- goto Endtile;
- };
- activeLanes = _simd_vmask_ps(work.anyCoveredSamples & MASK);
-
- if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
- {
- const uint64_t* pCoverageMask =
- (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
- ? &work.innerCoverageMask
- : &work.coverageMask[0];
-
- generateInputCoverage<T, T::InputCoverage>(
- pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
-
- CalcPixelBarycentrics(coeffs, psContext);
-
- CalcCentroid<T, false>(
- &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
-
- if (T::bForcedSampleCount)
- {
- // candidate pixels (that passed coverage) will cause shader invocation if any bits
- // in the samplemask are set
- const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(
- _simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
- activeLanes = _simd_and_ps(activeLanes, vSampleMask);
- }
-
- // Early-Z?
- if (T::bCanEarlyZ && !T::bForcedSampleCount)
- {
- uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
- UPDATE_STAT_BE(DepthPassCount, depthPassCount);
- AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
- }
-
- // if we have no covered samples that passed depth at this point, go to next tile
- if (!_simd_movemask_ps(activeLanes))
- {
- goto Endtile;
- };
-
- if (state.psState.usesSourceDepth)
- {
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
- // interpolate and quantize z
- psContext.vZ = vplaneps(
- coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
- psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
- RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
- }
-
- // pixels that are currently active
- psContext.activeMask = _simd_castps_si(activeLanes);
- psContext.oMask = T::MultisampleT::FullSampleMask();
-
- // execute pixel shader
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
- state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
- RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
-
- // update stats
- UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
- AR_EVENT(PSStats((HANDLE)&psContext.stats));
-
- // update active lanes to remove any discarded or oMask'd pixels
- activeLanes = _simd_castsi_ps(_simd_and_si(
- psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
- if (!_simd_movemask_ps(activeLanes))
- {
- goto Endtile;
- };
-
- isTileDirty = true;
-
- // late-Z
- if (!T::bCanEarlyZ && !T::bForcedSampleCount)
- {
- uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
- UPDATE_STAT_BE(DepthPassCount, depthPassCount);
- AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
- }
-
- // if we have no covered samples that passed depth at this point, skip OM and go to next
- // tile
- if (!_simd_movemask_ps(activeLanes))
- {
- goto Endtile;
- };
-
- // output merger
- // loop over all samples, broadcasting the results of the PS to all passing pixels
- for (uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount);
- sample++)
- {
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
- // center pattern does a single coverage/depth/stencil test, standard pattern tests
- // all samples
- uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
- simdscalar coverageMask, depthMask;
- if (T::bForcedSampleCount)
- {
- coverageMask = depthMask = activeLanes;
- }
- else
- {
- coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
- depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
- if (!_simd_movemask_ps(depthMask))
- {
- // stencil should already have been written in early/lateZ tests
- RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
- continue;
- }
- }
-
- // broadcast the results of the PS to all passing pixels
-
- OutputMerger8x2(pDC,
- psContext,
- psContext.pColorBuffer,
- sample,
- &state.blendState,
- state.pfnBlendFunc,
- coverageMask,
- depthMask,
- state.psState.renderTargetMask,
- useAlternateOffset,
- workerId);
-
-
- if (!state.psState.forceEarlyZ && !T::bForcedSampleCount)
- {
- uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
-
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- PixelRateZTest.vZ[coverageSampleNum],
- pDepthSample,
- depthMask,
- coverageMask,
- pStencilSample,
- PixelRateZTest.stencilPassMask[coverageSampleNum]);
- }
- RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
- }
- Endtile:
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
-
- for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
- {
- work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- }
-
- if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
- {
- work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- }
- work.anyCoveredSamples >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
-
- if (useAlternateOffset)
- {
- unsigned long rt;
- uint32_t rtMask = state.colorHottileEnable;
- while (_BitScanForward(&rt, rtMask))
- {
- rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] +=
- (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
- }
- }
-
- pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer +=
- (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
-
- psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
- psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
- }
-
- psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
- psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
- }
-
- if (isTileDirty)
- {
- SetRenderHotTilesDirty(pDC, renderBuffers);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0);
-}
-
-template <uint32_t sampleCountT = SWR_MULTISAMPLE_1X,
- uint32_t isCenter = 0,
- uint32_t coverage = 0,
- uint32_t centroid = 0,
- uint32_t forced = 0,
- uint32_t canEarlyZ = 0
- >
-struct SwrBackendTraits
-{
- static const bool bIsCenterPattern = (isCenter == 1);
- static const uint32_t InputCoverage = coverage;
- static const bool bCentroidPos = (centroid == 1);
- static const bool bForcedSampleCount = (forced == 1);
- static const bool bCanEarlyZ = (canEarlyZ == 1);
- typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
deleted file mode 100644
index 7881d36ddb9..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ /dev/null
@@ -1,454 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file backend.cpp
- *
- * @brief Backend handles rasterization, pixel shading and output merger
- * operations.
- *
- ******************************************************************************/
-
-#include <smmintrin.h>
-
-#include "backend.h"
-#include "backend_impl.h"
-#include "tilemgr.h"
-#include "memory/tilingtraits.h"
-#include "core/multisample.h"
-
-#include <algorithm>
-
-template <typename T>
-void BackendSampleRate(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t x,
- uint32_t y,
- SWR_TRIANGLE_DESC& work,
- RenderOutputBuffers& renderBuffers)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESampleRateBackend, pDC->drawId);
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
-
- void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
- const API_STATE& state = GetApiState(pDC);
-
- BarycentricCoeffs coeffs;
- SetupBarycentricCoeffs(&coeffs, work);
-
- SWR_PS_CONTEXT psContext;
- const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
- SetupPixelShaderContext<T>(&psContext, samplePos, work);
-
- uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer,
- &pDepthBuffer,
- &pStencilBuffer,
- state.colorHottileEnable,
- renderBuffers);
-
- bool isTileDirty = false;
-
- RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
-
- psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
- psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
-
- const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
-
- for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
- {
- psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
- psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
-
- const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
-
- for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
- {
- const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
-
-
- if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
- {
- const uint64_t* pCoverageMask =
- (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
- ? &work.innerCoverageMask
- : &work.coverageMask[0];
-
- generateInputCoverage<T, T::InputCoverage>(
- pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
-
- CalcPixelBarycentrics(coeffs, psContext);
-
- CalcCentroid<T, false>(
- &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
-
- for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
- {
- simdmask coverageMask = work.coverageMask[sample] & MASK;
-
- if (coverageMask)
- {
- // offset depth/stencil buffers current sample
- uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
-
- if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
- {
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
- "Unsupported depth hot tile format");
-
- const simdscalar z =
- _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
-
- const float minz = state.depthBoundsState.depthBoundsTestMinValue;
- const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
-
- coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
-
- // calculate per sample positions
- psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
- psContext.vY.sample = _simd_add_ps(psContext.vY.UL, samplePos.vY(sample));
-
- CalcSampleBarycentrics(coeffs, psContext);
-
- // interpolate and quantize z
- psContext.vZ = vplaneps(coeffs.vZa,
- coeffs.vZb,
- coeffs.vZc,
- psContext.vI.sample,
- psContext.vJ.sample);
- psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
-
- // interpolate user clip distance if available
- if (state.backendState.clipDistanceMask)
- {
- coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
- work.pUserClipBuffer,
- psContext.vI.sample,
- psContext.vJ.sample);
- }
-
- simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
- simdscalar depthPassMask = vCoverageMask;
- simdscalar stencilPassMask = vCoverageMask;
-
- // Early-Z?
- if (T::bCanEarlyZ)
- {
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state,
- work.triFlags.frontFacing,
- work.triFlags.viewportIndex,
- psContext.vZ,
- pDepthSample,
- vCoverageMask,
- pStencilSample,
- &stencilPassMask);
- AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
- _simd_movemask_ps(stencilPassMask),
- _simd_movemask_ps(vCoverageMask)));
- RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
-
- // early-exit if no samples passed depth or earlyZ is forced on.
- if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
- {
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthSample,
- depthPassMask,
- vCoverageMask,
- pStencilSample,
- stencilPassMask);
-
- if (!_simd_movemask_ps(depthPassMask))
- {
- work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- continue;
- }
- }
- }
-
- psContext.sampleIndex = sample;
- psContext.activeMask = _simd_castps_si(vCoverageMask);
-
- // execute pixel shader
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
- state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
- RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
-
- // update stats
- UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
- AR_EVENT(PSStats((HANDLE)&psContext.stats));
-
- vCoverageMask = _simd_castsi_ps(psContext.activeMask);
-
- if (_simd_movemask_ps(vCoverageMask))
- {
- isTileDirty = true;
- }
-
- // late-Z
- if (!T::bCanEarlyZ)
- {
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state,
- work.triFlags.frontFacing,
- work.triFlags.viewportIndex,
- psContext.vZ,
- pDepthSample,
- vCoverageMask,
- pStencilSample,
- &stencilPassMask);
- AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
- _simd_movemask_ps(stencilPassMask),
- _simd_movemask_ps(vCoverageMask)));
- RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
-
- if (!_simd_movemask_ps(depthPassMask))
- {
- // need to call depth/stencil write for stencil write
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthSample,
- depthPassMask,
- vCoverageMask,
- pStencilSample,
- stencilPassMask);
-
- work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- continue;
- }
- }
-
- uint32_t statMask = _simd_movemask_ps(depthPassMask);
- uint32_t statCount = _mm_popcnt_u32(statMask);
- UPDATE_STAT_BE(DepthPassCount, statCount);
-
- // output merger
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
-
- OutputMerger8x2(pDC,
- psContext,
- psContext.pColorBuffer,
- sample,
- &state.blendState,
- state.pfnBlendFunc,
- vCoverageMask,
- depthPassMask,
- state.psState.renderTargetMask,
- useAlternateOffset,
- workerId);
-
- // do final depth write after all pixel kills
- if (!state.psState.forceEarlyZ)
- {
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthSample,
- depthPassMask,
- vCoverageMask,
- pStencilSample,
- stencilPassMask);
- }
- RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
- }
- work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- }
-
- Endtile:
- ATTR_UNUSED;
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
-
- if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
- {
- work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- }
-
- if (useAlternateOffset)
- {
- unsigned long rt;
- uint32_t rtMask = state.colorHottileEnable;
- while (_BitScanForward(&rt, rtMask))
- {
- rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] +=
- (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
- }
- }
-
- pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer +=
- (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
-
- psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
- psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
- }
-
- psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
- psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
- }
-
- if (isTileDirty)
- {
- SetRenderHotTilesDirty(pDC, renderBuffers);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0);
-}
-
-// Recursive template used to auto-nest conditionals. Converts dynamic enum function
-// arguments to static template arguments.
-template <uint32_t... ArgsT>
-struct BEChooserSampleRate
-{
- // Last Arg Terminator
- static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
- {
- switch (tArg)
- {
- case SWR_BACKEND_MSAA_SAMPLE_RATE:
- return BackendSampleRate<SwrBackendTraits<ArgsT...>>;
- break;
- case SWR_BACKEND_SINGLE_SAMPLE:
- case SWR_BACKEND_MSAA_PIXEL_RATE:
- SWR_ASSERT(0 && "Invalid backend func\n");
- return nullptr;
- break;
- default:
- SWR_ASSERT(0 && "Invalid backend func\n");
- return nullptr;
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
- {
- switch (tArg)
- {
- case SWR_INPUT_COVERAGE_NONE:
- return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
- remainingArgs...);
- break;
- case SWR_INPUT_COVERAGE_NORMAL:
- return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
- remainingArgs...);
- break;
- case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
- return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
- remainingArgs...);
- break;
- default:
- SWR_ASSERT(0 && "Invalid sample pattern\n");
- return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
- remainingArgs...);
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
- {
- switch (tArg)
- {
- case SWR_MULTISAMPLE_1X:
- return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_2X:
- return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_4X:
- return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_8X:
- return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_16X:
- return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
- break;
- default:
- SWR_ASSERT(0 && "Invalid sample count\n");
- return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
- {
- if (tArg == true)
- {
- return BEChooserSampleRate<ArgsT..., 1>::GetFunc(remainingArgs...);
- }
-
- return BEChooserSampleRate<ArgsT..., 0>::GetFunc(remainingArgs...);
- }
-};
-
-void InitBackendSampleFuncTable(
- PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
-{
- for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT;
- sampleCount++)
- {
- for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
- {
- for (uint32_t centroid = 0; centroid < 2; centroid++)
- {
- for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
- {
- table[sampleCount][inputCoverage][centroid][canEarlyZ] =
- BEChooserSampleRate<>::GetFunc(
- (SWR_MULTISAMPLE_COUNT)sampleCount,
- false,
- (SWR_INPUT_COVERAGE)inputCoverage,
- (centroid > 0),
- false,
- (canEarlyZ > 0),
- (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
- }
- }
- }
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
deleted file mode 100644
index 06f78c4b88a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
+++ /dev/null
@@ -1,428 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file backend.cpp
- *
- * @brief Backend handles rasterization, pixel shading and output merger
- * operations.
- *
- ******************************************************************************/
-
-#include <smmintrin.h>
-
-#include "backend.h"
-#include "backend_impl.h"
-#include "tilemgr.h"
-#include "memory/tilingtraits.h"
-#include "core/multisample.h"
-
-#include <algorithm>
-
-template <typename T>
-void BackendSingleSample(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t x,
- uint32_t y,
- SWR_TRIANGLE_DESC& work,
- RenderOutputBuffers& renderBuffers)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESingleSampleBackend, pDC->drawId);
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
-
- void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- const API_STATE& state = GetApiState(pDC);
-
- BarycentricCoeffs coeffs;
- SetupBarycentricCoeffs(&coeffs, work);
-
- SWR_PS_CONTEXT psContext;
- const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
- SetupPixelShaderContext<T>(&psContext, samplePos, work);
-
- uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer,
- &pDepthBuffer,
- &pStencilBuffer,
- state.colorHottileEnable,
- renderBuffers);
-
- // Indicates backend rendered something to the color buffer
- bool isTileDirty = false;
-
- RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1);
-
- psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
- psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
-
- const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
-
- for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
- {
- psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
- psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
-
- const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
-
- for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
- {
- const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
-
-
- simdmask coverageMask = work.coverageMask[0] & MASK;
-
- if (coverageMask)
- {
- if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
- {
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
- "Unsupported depth hot tile format");
-
- const simdscalar z =
- _simd_load_ps(reinterpret_cast<const float*>(pDepthBuffer));
-
- const float minz = state.depthBoundsState.depthBoundsTestMinValue;
- const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
-
- coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
- }
-
- if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
- {
- const uint64_t* pCoverageMask =
- (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
- ? &work.innerCoverageMask
- : &work.coverageMask[0];
-
- generateInputCoverage<T, T::InputCoverage>(
- pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
-
- CalcPixelBarycentrics(coeffs, psContext);
-
- CalcCentroid<T, true>(
- &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
-
- // interpolate and quantize z
- psContext.vZ = vplaneps(
- coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
- psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 1);
-
- // interpolate user clip distance if available
- if (state.backendState.clipDistanceMask)
- {
- coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
- work.pUserClipBuffer,
- psContext.vI.center,
- psContext.vJ.center);
- }
-
- simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
- simdscalar depthPassMask = vCoverageMask;
- simdscalar stencilPassMask = vCoverageMask;
-
- // Early-Z?
- if (T::bCanEarlyZ)
- {
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state,
- work.triFlags.frontFacing,
- work.triFlags.viewportIndex,
- psContext.vZ,
- pDepthBuffer,
- vCoverageMask,
- pStencilBuffer,
- &stencilPassMask);
- AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
- _simd_movemask_ps(stencilPassMask),
- _simd_movemask_ps(vCoverageMask)));
- RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
-
- // early-exit if no pixels passed depth or earlyZ is forced on
- if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
- {
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthBuffer,
- depthPassMask,
- vCoverageMask,
- pStencilBuffer,
- stencilPassMask);
-
- if (!_simd_movemask_ps(depthPassMask))
- {
- goto Endtile;
- }
- }
- }
-
- psContext.sampleIndex = 0;
- psContext.activeMask = _simd_castps_si(vCoverageMask);
-
- // execute pixel shader
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
- state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
- RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
-
- // update stats
- UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
- AR_EVENT(PSStats((HANDLE)&psContext.stats));
-
- vCoverageMask = _simd_castsi_ps(psContext.activeMask);
-
- if (_simd_movemask_ps(vCoverageMask))
- {
- isTileDirty = true;
- }
-
- // late-Z
- if (!T::bCanEarlyZ)
- {
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state,
- work.triFlags.frontFacing,
- work.triFlags.viewportIndex,
- psContext.vZ,
- pDepthBuffer,
- vCoverageMask,
- pStencilBuffer,
- &stencilPassMask);
- AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
- _simd_movemask_ps(stencilPassMask),
- _simd_movemask_ps(vCoverageMask)));
- RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
-
- if (!_simd_movemask_ps(depthPassMask))
- {
- // need to call depth/stencil write for stencil write
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthBuffer,
- depthPassMask,
- vCoverageMask,
- pStencilBuffer,
- stencilPassMask);
- goto Endtile;
- }
- }
- else
- {
- // for early z, consolidate discards from shader
- // into depthPassMask
- depthPassMask = _simd_and_ps(depthPassMask, vCoverageMask);
- }
-
- uint32_t statMask = _simd_movemask_ps(depthPassMask);
- uint32_t statCount = _mm_popcnt_u32(statMask);
- UPDATE_STAT_BE(DepthPassCount, statCount);
-
- // output merger
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
-
- OutputMerger8x2(pDC,
- psContext,
- psContext.pColorBuffer,
- 0,
- &state.blendState,
- state.pfnBlendFunc,
- vCoverageMask,
- depthPassMask,
- state.psState.renderTargetMask,
- useAlternateOffset,
- workerId);
-
- // do final depth write after all pixel kills
- if (!state.psState.forceEarlyZ)
- {
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
- &state.depthStencilState,
- work.triFlags.frontFacing,
- psContext.vZ,
- pDepthBuffer,
- depthPassMask,
- vCoverageMask,
- pStencilBuffer,
- stencilPassMask);
- }
- RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
- }
-
- Endtile:
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
-
- work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
- {
- work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- }
-
- if (useAlternateOffset)
- {
- unsigned long rt;
- uint32_t rtMask = state.colorHottileEnable;
- while (_BitScanForward(&rt, rtMask))
- {
- rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] +=
- (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
- }
- }
-
- pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer +=
- (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
-
- psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
- psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
- }
-
- psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
- psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
- }
-
- if (isTileDirty)
- {
- SetRenderHotTilesDirty(pDC, renderBuffers);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0);
-}
-
-// Recursive template used to auto-nest conditionals. Converts dynamic enum function
-// arguments to static template arguments.
-template <uint32_t... ArgsT>
-struct BEChooserSingleSample
-{
- // Last Arg Terminator
- static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
- {
- switch (tArg)
- {
- case SWR_BACKEND_SINGLE_SAMPLE:
- return BackendSingleSample<SwrBackendTraits<ArgsT...>>;
- break;
- case SWR_BACKEND_MSAA_PIXEL_RATE:
- case SWR_BACKEND_MSAA_SAMPLE_RATE:
- default:
- SWR_ASSERT(0 && "Invalid backend func\n");
- return nullptr;
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
- {
- switch (tArg)
- {
- case SWR_INPUT_COVERAGE_NONE:
- return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
- remainingArgs...);
- break;
- case SWR_INPUT_COVERAGE_NORMAL:
- return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
- remainingArgs...);
- break;
- case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
- return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
- remainingArgs...);
- break;
- default:
- SWR_ASSERT(0 && "Invalid sample pattern\n");
- return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
- remainingArgs...);
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
- {
- switch (tArg)
- {
- case SWR_MULTISAMPLE_1X:
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_2X:
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_4X:
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_8X:
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
- break;
- case SWR_MULTISAMPLE_16X:
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
- break;
- default:
- SWR_ASSERT(0 && "Invalid sample count\n");
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
- break;
- }
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
- {
- if (tArg == true)
- {
- return BEChooserSingleSample<ArgsT..., 1>::GetFunc(remainingArgs...);
- }
-
- return BEChooserSingleSample<ArgsT..., 0>::GetFunc(remainingArgs...);
- }
-};
-
-void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2])
-{
- for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
- {
- for (uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
- {
- for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
- {
- table[inputCoverage][isCentroid][canEarlyZ] =
- BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X,
- false,
- (SWR_INPUT_COVERAGE)inputCoverage,
- (isCentroid > 0),
- false,
- (canEarlyZ > 0),
- SWR_BACKEND_SINGLE_SAMPLE);
- }
- }
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/backends/meson.build b/src/gallium/drivers/swr/rasterizer/core/backends/meson.build
deleted file mode 100644
index d64715dc8be..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/backends/meson.build
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright © 2017-2018 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-
-files_swr_common += custom_target(
- 'gen_backend_pixel',
- input : swr_gen_backends_py,
- output : [
- 'gen_BackendPixelRate0.cpp', 'gen_BackendPixelRate1.cpp',
- 'gen_BackendPixelRate2.cpp', 'gen_BackendPixelRate3.cpp',
- 'gen_BackendPixelRate.hpp',
- ],
- command : [
- prog_python, '@INPUT@',
- '--outdir', '@OUTDIR@',
- '--dim', '5', '2', '3', '2', '2', '2',
- '--numfiles', '4',
- '--cpp', '--hpp',
- ],
- depend_files : [ swr_gen_backend_files, swr_gen_header_init_files ],
-)
-
-files_swr_common += custom_target(
- 'gen_backend_raster',
- input : swr_gen_backends_py,
- output : [
- 'gen_rasterizer0.cpp', 'gen_rasterizer1.cpp',
- 'gen_rasterizer2.cpp', 'gen_rasterizer3.cpp',
- 'gen_rasterizer.hpp',
- ],
- command : [
- prog_python, '@INPUT@',
- '--outdir', '@OUTDIR@',
- '--rast',
- '--dim', '5', '2', '2', '3', '5', '2',
- '--numfiles', '4',
- '--cpp', '--hpp',
- ],
- depend_files : [ swr_gen_rasterizer_files, swr_gen_header_init_files ],
-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
deleted file mode 100644
index 36732289d76..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ /dev/null
@@ -1,1976 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file binner.cpp
- *
- * @brief Implementation for the macrotile binner
- *
- ******************************************************************************/
-
-#include "binner.h"
-#include "context.h"
-#include "frontend.h"
-#include "conservativeRast.h"
-#include "pa.h"
-#include "rasterizer.h"
-#include "rdtsc_core.h"
-#include "tilemgr.h"
-
-// Function Prototype
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- Float<SIMD_T> recipW[],
- uint32_t primMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx);
-
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- uint32_t primMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Processes attributes for the backend based on linkage mask and
-/// linkage map. Essentially just doing an SOA->AOS conversion and pack.
-/// @param pDC - Draw context
-/// @param pa - Primitive Assembly state
-/// @param linkageMask - Specifies which VS outputs are routed to PS.
-/// @param pLinkageMap - maps VS attribute slot to PS slot
-/// @param triIndex - Triangle to process attributes for
-/// @param pBuffer - Output result
-template <typename NumVertsT,
- typename IsSwizzledT,
- typename HasConstantInterpT,
- typename IsDegenerate>
-INLINE void ProcessAttributes(
- DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t triIndex, uint32_t primId, float* pBuffer)
-{
- static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
- const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
- // Conservative Rasterization requires degenerate tris to have constant attribute interpolation
- uint32_t constantInterpMask =
- IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
- const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
- const PRIMITIVE_TOPOLOGY topo = pa.binTopology;
-
- static const float constTable[3][4] = {
- {0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f}};
-
- for (uint32_t i = 0; i < backendState.numAttributes; ++i)
- {
- uint32_t inputSlot;
- if (IsSwizzledT::value)
- {
- SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
- inputSlot = backendState.vertexAttribOffset + attribSwizzle.sourceAttrib;
- }
- else
- {
- inputSlot = backendState.vertexAttribOffset + i;
- }
-
- simd4scalar attrib[3]; // triangle attribs (always 4 wide)
- float* pAttribStart = pBuffer;
-
- if (HasConstantInterpT::value || IsDegenerate::value)
- {
- if (CheckBit(constantInterpMask, i))
- {
- uint32_t vid;
- uint32_t adjustedTriIndex;
- static const uint32_t tristripProvokingVertex[] = {0, 2, 1};
- static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}};
- static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}};
- static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
- static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
-
- switch (topo)
- {
- case TOP_QUAD_LIST:
- adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
- vid = quadProvokingVertex[triIndex & 1][provokingVertex];
- break;
- case TOP_QUAD_STRIP:
- adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
- vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
- break;
- case TOP_TRIANGLE_STRIP:
- adjustedTriIndex = triIndex;
- vid =
- (triIndex & 1) ? tristripProvokingVertex[provokingVertex] : provokingVertex;
- break;
- default:
- adjustedTriIndex = triIndex;
- vid = provokingVertex;
- break;
- }
-
- pa.AssembleSingle(inputSlot, adjustedTriIndex, attrib);
-
- for (uint32_t i = 0; i < NumVertsT::value; ++i)
- {
- SIMD128::store_ps(pBuffer, attrib[vid]);
- pBuffer += 4;
- }
- }
- else
- {
- pa.AssembleSingle(inputSlot, triIndex, attrib);
-
- for (uint32_t i = 0; i < NumVertsT::value; ++i)
- {
- SIMD128::store_ps(pBuffer, attrib[i]);
- pBuffer += 4;
- }
- }
- }
- else
- {
- pa.AssembleSingle(inputSlot, triIndex, attrib);
-
- for (uint32_t i = 0; i < NumVertsT::value; ++i)
- {
- SIMD128::store_ps(pBuffer, attrib[i]);
- pBuffer += 4;
- }
- }
-
- // pad out the attrib buffer to 3 verts to ensure the triangle
- // interpolation code in the pixel shader works correctly for the
- // 3 topologies - point, line, tri. This effectively zeros out the
- // effect of the missing vertices in the triangle interpolation.
- for (uint32_t v = NumVertsT::value; v < 3; ++v)
- {
- SIMD128::store_ps(pBuffer, attrib[NumVertsT::value - 1]);
- pBuffer += 4;
- }
-
- // check for constant source overrides
- if (IsSwizzledT::value)
- {
- uint32_t mask = backendState.swizzleMap[i].componentOverrideMask;
- if (mask)
- {
- unsigned long comp;
- while (_BitScanForward(&comp, mask))
- {
- mask &= ~(1 << comp);
-
- float constantValue = 0.0f;
- switch ((SWR_CONSTANT_SOURCE)backendState.swizzleMap[i].constantSource)
- {
- case SWR_CONSTANT_SOURCE_CONST_0000:
- case SWR_CONSTANT_SOURCE_CONST_0001_FLOAT:
- case SWR_CONSTANT_SOURCE_CONST_1111_FLOAT:
- constantValue = constTable[backendState.swizzleMap[i].constantSource][comp];
- break;
- case SWR_CONSTANT_SOURCE_PRIM_ID:
- constantValue = *(float*)&primId;
- break;
- }
-
- // apply constant value to all 3 vertices
- for (uint32_t v = 0; v < 3; ++v)
- {
- pAttribStart[comp + v * 4] = constantValue;
- }
- }
- }
- }
- }
-}
-
-typedef void (*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
-
-struct ProcessAttributesChooser
-{
- typedef PFN_PROCESS_ATTRIBUTES FuncType;
-
- template <typename... ArgsB>
- static FuncType GetFunc()
- {
- return ProcessAttributes<ArgsB...>;
- }
-};
-
-PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts,
- bool IsSwizzled,
- bool HasConstantInterp,
- bool IsDegenerate = false)
-{
- return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(
- IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Processes enabled user clip distances. Loads the active clip
-/// distances from the PA, sets up barycentric equations, and
-/// stores the results to the output buffer
-/// @param pa - Primitive Assembly state
-/// @param primIndex - primitive index to process
-/// @param clipDistMask - mask of enabled clip distances
-/// @param pUserClipBuffer - buffer to store results
-template <uint32_t NumVerts>
-void ProcessUserClipDist(const SWR_BACKEND_STATE& state,
- PA_STATE& pa,
- uint32_t primIndex,
- float* pRecipW,
- float* pUserClipBuffer)
-{
- unsigned long clipDist;
- uint32_t clipDistMask = state.clipDistanceMask;
- while (_BitScanForward(&clipDist, clipDistMask))
- {
- clipDistMask &= ~(1 << clipDist);
- uint32_t clipSlot = clipDist >> 2;
- uint32_t clipComp = clipDist & 0x3;
- uint32_t clipAttribSlot =
- clipSlot == 0 ? state.vertexClipCullOffset : state.vertexClipCullOffset + 1;
-
- simd4scalar primClipDist[3];
- pa.AssembleSingle(clipAttribSlot, primIndex, primClipDist);
-
- float vertClipDist[NumVerts];
- for (uint32_t e = 0; e < NumVerts; ++e)
- {
- OSALIGNSIMD(float) aVertClipDist[4];
- SIMD128::store_ps(aVertClipDist, primClipDist[e]);
- vertClipDist[e] = aVertClipDist[clipComp];
- };
-
- // setup plane equations for barycentric interpolation in the backend
- float baryCoeff[NumVerts];
- float last = vertClipDist[NumVerts - 1] * pRecipW[NumVerts - 1];
- for (uint32_t e = 0; e < NumVerts - 1; ++e)
- {
- baryCoeff[e] = vertClipDist[e] * pRecipW[e] - last;
- }
- baryCoeff[NumVerts - 1] = last;
-
- for (uint32_t e = 0; e < NumVerts; ++e)
- {
- *(pUserClipBuffer++) = baryCoeff[e];
- }
- }
-}
-
-INLINE
-void TransposeVertices(simd4scalar (&dst)[8],
- const simdscalar& src0,
- const simdscalar& src1,
- const simdscalar& src2)
-{
- vTranspose3x8(dst, src0, src1, src2);
-}
-
-INLINE
-void TransposeVertices(simd4scalar (&dst)[16],
- const simd16scalar& src0,
- const simd16scalar& src1,
- const simd16scalar& src2)
-{
- vTranspose4x16(
- reinterpret_cast<simd16scalar(&)[4]>(dst), src0, src1, src2, _simd16_setzero_ps());
-}
-
-#if KNOB_ENABLE_EARLY_RAST
-
-#define ER_SIMD_TILE_X_DIM (1 << ER_SIMD_TILE_X_SHIFT)
-#define ER_SIMD_TILE_Y_DIM (1 << ER_SIMD_TILE_Y_SHIFT)
-
-template <typename SIMD_T>
-struct EarlyRastHelper
-{
-};
-
-template <>
-struct EarlyRastHelper<SIMD256>
-{
- static SIMD256::Integer InitShiftCntrl()
- {
- return SIMD256::set_epi32(24, 25, 26, 27, 28, 29, 30, 31);
- }
-};
-
-#if USE_SIMD16_FRONTEND
-template <>
-struct EarlyRastHelper<SIMD512>
-{
- static SIMD512::Integer InitShiftCntrl()
- {
- return SIMD512::set_epi32(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
- }
-};
-
-#endif
-//////////////////////////////////////////////////////////////////////////
-/// @brief Early Rasterizer (ER); triangles that fit small (e.g. 4x4) tile
-/// (ER tile) can be rasterized as early as in binner to check if
-/// they cover any pixels. If not - the triangles can be
-/// culled in binner.
-///
-/// @param er_bbox - coordinates of ER tile for each triangle
-/// @param vAi - A coefficients of triangle edges
-/// @param vBi - B coefficients of triangle edges
-/// @param vXi - X coordinates of triangle vertices
-/// @param vYi - Y coordinates of triangle vertices
-/// @param frontWindingTris - mask indicating CCW/CW triangles
-/// @param triMask - mask for valid SIMD lanes (triangles)
-/// @param oneTileMask - defines triangles for ER to work on
-/// (tris that fit into ER tile)
-template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-uint32_t SIMDCALL EarlyRasterizer(DRAW_CONTEXT* pDC,
- SIMDBBOX_T<SIMD_T>& er_bbox,
- Integer<SIMD_T> (&vAi)[3],
- Integer<SIMD_T> (&vBi)[3],
- Integer<SIMD_T> (&vXi)[3],
- Integer<SIMD_T> (&vYi)[3],
- uint32_t cwTrisMask,
- uint32_t triMask,
- uint32_t oneTileMask)
-{
- // step to pixel center of top-left pixel of the triangle bbox
- Integer<SIMD_T> vTopLeftX =
- SIMD_T::template slli_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(er_bbox.xmin);
- vTopLeftX = SIMD_T::add_epi32(vTopLeftX, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
-
- Integer<SIMD_T> vTopLeftY =
- SIMD_T::template slli_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(er_bbox.ymin);
- vTopLeftY = SIMD_T::add_epi32(vTopLeftY, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
-
- // negate A and B for CW tris
- Integer<SIMD_T> vNegA0 = SIMD_T::mullo_epi32(vAi[0], SIMD_T::set1_epi32(-1));
- Integer<SIMD_T> vNegA1 = SIMD_T::mullo_epi32(vAi[1], SIMD_T::set1_epi32(-1));
- Integer<SIMD_T> vNegA2 = SIMD_T::mullo_epi32(vAi[2], SIMD_T::set1_epi32(-1));
- Integer<SIMD_T> vNegB0 = SIMD_T::mullo_epi32(vBi[0], SIMD_T::set1_epi32(-1));
- Integer<SIMD_T> vNegB1 = SIMD_T::mullo_epi32(vBi[1], SIMD_T::set1_epi32(-1));
- Integer<SIMD_T> vNegB2 = SIMD_T::mullo_epi32(vBi[2], SIMD_T::set1_epi32(-1));
-
- RDTSC_EVENT(pDC->pContext->pBucketMgr,
- FEEarlyRastEnter,
- _mm_popcnt_u32(oneTileMask & triMask),
- 0);
-
- Integer<SIMD_T> vShiftCntrl = EarlyRastHelper<SIMD_T>::InitShiftCntrl();
- Integer<SIMD_T> vCwTris = SIMD_T::set1_epi32(cwTrisMask);
- Integer<SIMD_T> vMask = SIMD_T::sllv_epi32(vCwTris, vShiftCntrl);
-
- vAi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vAi[0]), SIMD_T::castsi_ps(vNegA0), SIMD_T::castsi_ps(vMask)));
- vAi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vAi[1]), SIMD_T::castsi_ps(vNegA1), SIMD_T::castsi_ps(vMask)));
- vAi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vAi[2]), SIMD_T::castsi_ps(vNegA2), SIMD_T::castsi_ps(vMask)));
- vBi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vBi[0]), SIMD_T::castsi_ps(vNegB0), SIMD_T::castsi_ps(vMask)));
- vBi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vBi[1]), SIMD_T::castsi_ps(vNegB1), SIMD_T::castsi_ps(vMask)));
- vBi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vBi[2]), SIMD_T::castsi_ps(vNegB2), SIMD_T::castsi_ps(vMask)));
-
- // evaluate edge equations at top-left pixel
- Integer<SIMD_T> vDeltaX0 = SIMD_T::sub_epi32(vTopLeftX, vXi[0]);
- Integer<SIMD_T> vDeltaX1 = SIMD_T::sub_epi32(vTopLeftX, vXi[1]);
- Integer<SIMD_T> vDeltaX2 = SIMD_T::sub_epi32(vTopLeftX, vXi[2]);
-
- Integer<SIMD_T> vDeltaY0 = SIMD_T::sub_epi32(vTopLeftY, vYi[0]);
- Integer<SIMD_T> vDeltaY1 = SIMD_T::sub_epi32(vTopLeftY, vYi[1]);
- Integer<SIMD_T> vDeltaY2 = SIMD_T::sub_epi32(vTopLeftY, vYi[2]);
-
- Integer<SIMD_T> vAX0 = SIMD_T::mullo_epi32(vAi[0], vDeltaX0);
- Integer<SIMD_T> vAX1 = SIMD_T::mullo_epi32(vAi[1], vDeltaX1);
- Integer<SIMD_T> vAX2 = SIMD_T::mullo_epi32(vAi[2], vDeltaX2);
-
- Integer<SIMD_T> vBY0 = SIMD_T::mullo_epi32(vBi[0], vDeltaY0);
- Integer<SIMD_T> vBY1 = SIMD_T::mullo_epi32(vBi[1], vDeltaY1);
- Integer<SIMD_T> vBY2 = SIMD_T::mullo_epi32(vBi[2], vDeltaY2);
-
- Integer<SIMD_T> vEdge0 = SIMD_T::add_epi32(vAX0, vBY0);
- Integer<SIMD_T> vEdge1 = SIMD_T::add_epi32(vAX1, vBY1);
- Integer<SIMD_T> vEdge2 = SIMD_T::add_epi32(vAX2, vBY2);
-
- vEdge0 = SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vEdge0);
- vEdge1 = SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vEdge1);
- vEdge2 = SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vEdge2);
-
- // top left rule
- Integer<SIMD_T> vEdgeAdjust0 = SIMD_T::sub_epi32(vEdge0, SIMD_T::set1_epi32(1));
- Integer<SIMD_T> vEdgeAdjust1 = SIMD_T::sub_epi32(vEdge1, SIMD_T::set1_epi32(1));
- Integer<SIMD_T> vEdgeAdjust2 = SIMD_T::sub_epi32(vEdge2, SIMD_T::set1_epi32(1));
-
- // vA < 0
- vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vAi[0])));
- vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vAi[1])));
- vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vAi[2])));
-
- // vA == 0 && vB < 0
- Integer<SIMD_T> vCmp0 = SIMD_T::cmpeq_epi32(vAi[0], SIMD_T::setzero_si());
- Integer<SIMD_T> vCmp1 = SIMD_T::cmpeq_epi32(vAi[1], SIMD_T::setzero_si());
- Integer<SIMD_T> vCmp2 = SIMD_T::cmpeq_epi32(vAi[2], SIMD_T::setzero_si());
-
- vCmp0 = SIMD_T::and_si(vCmp0, vBi[0]);
- vCmp1 = SIMD_T::and_si(vCmp1, vBi[1]);
- vCmp2 = SIMD_T::and_si(vCmp2, vBi[2]);
-
- vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vCmp0)));
- vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vCmp1)));
- vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vCmp2)));
-
-#if ER_SIMD_TILE_X_DIM == 4 && ER_SIMD_TILE_Y_DIM == 4
- // Go down
- // coverage pixel 0
- Integer<SIMD_T> vMask0 = SIMD_T::and_si(vEdge0, vEdge1);
- vMask0 = SIMD_T::and_si(vMask0, vEdge2);
-
- // coverage pixel 1
- Integer<SIMD_T> vEdge0N = SIMD_T::add_epi32(vEdge0, vBi[0]);
- Integer<SIMD_T> vEdge1N = SIMD_T::add_epi32(vEdge1, vBi[1]);
- Integer<SIMD_T> vEdge2N = SIMD_T::add_epi32(vEdge2, vBi[2]);
- Integer<SIMD_T> vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
-
- // coverage pixel 2
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- Integer<SIMD_T> vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
-
- // coverage pixel 3
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- Integer<SIMD_T> vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
-
- // One step to the right and then up
-
- // coverage pixel 4
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
- Integer<SIMD_T> vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
-
- // coverage pixel 5
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- Integer<SIMD_T> vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
-
- // coverage pixel 6
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- Integer<SIMD_T> vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
-
- // coverage pixel 7
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- Integer<SIMD_T> vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
-
- Integer<SIMD_T> vLit1 = SIMD_T::or_si(vMask0, vMask1);
- vLit1 = SIMD_T::or_si(vLit1, vMask2);
- vLit1 = SIMD_T::or_si(vLit1, vMask3);
- vLit1 = SIMD_T::or_si(vLit1, vMask4);
- vLit1 = SIMD_T::or_si(vLit1, vMask5);
- vLit1 = SIMD_T::or_si(vLit1, vMask6);
- vLit1 = SIMD_T::or_si(vLit1, vMask7);
-
- // Step to the right and go down again
-
- // coverage pixel 0
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
- vMask0 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask0 = SIMD_T::and_si(vMask0, vEdge2N);
-
- // coverage pixel 1
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
-
- // coverage pixel 2
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
-
- // coverage pixel 3
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
-
- // And for the last time - to the right and up
-
- // coverage pixel 4
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
- vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
-
- // coverage pixel 5
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
-
- // coverage pixel 6
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
-
- // coverage pixel 7
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
-
- Integer<SIMD_T> vLit2 = SIMD_T::or_si(vMask0, vMask1);
- vLit2 = SIMD_T::or_si(vLit2, vMask2);
- vLit2 = SIMD_T::or_si(vLit2, vMask3);
- vLit2 = SIMD_T::or_si(vLit2, vMask4);
- vLit2 = SIMD_T::or_si(vLit2, vMask5);
- vLit2 = SIMD_T::or_si(vLit2, vMask6);
- vLit2 = SIMD_T::or_si(vLit2, vMask7);
-
- Integer<SIMD_T> vLit = SIMD_T::or_si(vLit1, vLit2);
-
-#else
- // Generic algorithm sweeping in row by row order
- Integer<SIMD_T> vRowMask[ER_SIMD_TILE_Y_DIM];
-
- Integer<SIMD_T> vEdge0N = vEdge0;
- Integer<SIMD_T> vEdge1N = vEdge1;
- Integer<SIMD_T> vEdge2N = vEdge2;
-
- for (uint32_t row = 0; row < ER_SIMD_TILE_Y_DIM; row++)
- {
- // Store edge values at the beginning of the row
- Integer<SIMD_T> vRowEdge0 = vEdge0N;
- Integer<SIMD_T> vRowEdge1 = vEdge1N;
- Integer<SIMD_T> vRowEdge2 = vEdge2N;
-
- Integer<SIMD_T> vColMask[ER_SIMD_TILE_X_DIM];
-
- for (uint32_t col = 0; col < ER_SIMD_TILE_X_DIM; col++)
- {
- vColMask[col] = SIMD_T::and_si(vEdge0N, vEdge1N);
- vColMask[col] = SIMD_T::and_si(vColMask[col], vEdge2N);
-
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
- }
- vRowMask[row] = vColMask[0];
- for (uint32_t col = 1; col < ER_SIMD_TILE_X_DIM; col++)
- {
- vRowMask[row] = SIMD_T::or_si(vRowMask[row], vColMask[col]);
- }
- // Restore values and go to the next row
- vEdge0N = vRowEdge0;
- vEdge1N = vRowEdge1;
- vEdge2N = vRowEdge2;
-
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- }
-
- // compress all masks
- Integer<SIMD_T> vLit = vRowMask[0];
- for (uint32_t row = 1; row < ER_SIMD_TILE_Y_DIM; row++)
- {
- vLit = SIMD_T::or_si(vLit, vRowMask[row]);
- }
-
-#endif
- // Check which triangles has any pixel lit
- uint32_t maskLit = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vLit));
- uint32_t maskUnlit = ~maskLit & oneTileMask;
-
- uint32_t oldTriMask = triMask;
- triMask &= ~maskUnlit;
-
- if (triMask ^ oldTriMask)
- {
- RDTSC_EVENT(pDC->pContext->pBucketMgr,
- FEEarlyRastExit,
- _mm_popcnt_u32(triMask & oneTileMask),
- 0);
- }
- return triMask;
-}
-
-#endif // Early rasterizer
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Bin triangle primitives to macro tiles. Performs setup, clipping
-/// culling, viewport transform, etc.
-/// @param pDC - pointer to draw context.
-/// @param pa - The primitive assembly object.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param tri - Contains triangle position data for SIMDs worth of triangles.
-/// @param primID - Primitive ID for each triangle.
-/// @param viewportIdx - viewport array index for each triangle.
-/// @tparam CT - ConservativeRastFETraits
-template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> tri[3],
- uint32_t triMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx)
-{
- const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinTriangles, pDC->drawId);
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const SWR_FRONTEND_STATE& feState = state.frontendState;
-
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
-
- Float<SIMD_T> vRecipW0 = SIMD_T::set1_ps(1.0f);
- Float<SIMD_T> vRecipW1 = SIMD_T::set1_ps(1.0f);
- Float<SIMD_T> vRecipW2 = SIMD_T::set1_ps(1.0f);
-
- if (feState.vpTransformDisable)
- {
- // RHW is passed in directly when VP transform is disabled
- vRecipW0 = tri[0].v[3];
- vRecipW1 = tri[1].v[3];
- vRecipW2 = tri[2].v[3];
- }
- else
- {
- // Perspective divide
- vRecipW0 = SIMD_T::div_ps(SIMD_T::set1_ps(1.0f), tri[0].w);
- vRecipW1 = SIMD_T::div_ps(SIMD_T::set1_ps(1.0f), tri[1].w);
- vRecipW2 = SIMD_T::div_ps(SIMD_T::set1_ps(1.0f), tri[2].w);
-
- tri[0].v[0] = SIMD_T::mul_ps(tri[0].v[0], vRecipW0);
- tri[1].v[0] = SIMD_T::mul_ps(tri[1].v[0], vRecipW1);
- tri[2].v[0] = SIMD_T::mul_ps(tri[2].v[0], vRecipW2);
-
- tri[0].v[1] = SIMD_T::mul_ps(tri[0].v[1], vRecipW0);
- tri[1].v[1] = SIMD_T::mul_ps(tri[1].v[1], vRecipW1);
- tri[2].v[1] = SIMD_T::mul_ps(tri[2].v[1], vRecipW2);
-
- tri[0].v[2] = SIMD_T::mul_ps(tri[0].v[2], vRecipW0);
- tri[1].v[2] = SIMD_T::mul_ps(tri[1].v[2], vRecipW1);
- tri[2].v[2] = SIMD_T::mul_ps(tri[2].v[2], vRecipW2);
-
- // Viewport transform to screen space coords
- if (pa.viewportArrayActive)
- {
- viewportTransform<3>(tri, state.vpMatrices, viewportIdx);
- }
- else
- {
- viewportTransform<3>(tri, state.vpMatrices);
- }
- }
-
- // Adjust for pixel center location
- Float<SIMD_T> offset = SwrPixelOffsets<SIMD_T>::GetOffset(rastState.pixelLocation);
-
- tri[0].x = SIMD_T::add_ps(tri[0].x, offset);
- tri[0].y = SIMD_T::add_ps(tri[0].y, offset);
-
- tri[1].x = SIMD_T::add_ps(tri[1].x, offset);
- tri[1].y = SIMD_T::add_ps(tri[1].y, offset);
-
- tri[2].x = SIMD_T::add_ps(tri[2].x, offset);
- tri[2].y = SIMD_T::add_ps(tri[2].y, offset);
-
- // Set vXi, vYi to required fixed point precision
- Integer<SIMD_T> vXi[3], vYi[3];
- FPToFixedPoint<SIMD_T>(tri, vXi, vYi);
-
- // triangle setup
- Integer<SIMD_T> vAi[3], vBi[3];
- triangleSetupABIntVertical(vXi, vYi, vAi, vBi);
-
- // determinant
- Integer<SIMD_T> vDet[2];
- calcDeterminantIntVertical(vAi, vBi, vDet);
-
- // cull zero area
- uint32_t maskLo =
- SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[0], SIMD_T::setzero_si())));
- uint32_t maskHi =
- SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[1], SIMD_T::setzero_si())));
-
- uint32_t cullZeroAreaMask = maskLo | (maskHi << (SIMD_WIDTH / 2));
-
- // don't cull degenerate triangles if we're conservatively rasterizing
- uint32_t origTriMask = triMask;
- if (rastState.fillMode == SWR_FILLMODE_SOLID && !CT::IsConservativeT::value)
- {
- triMask &= ~cullZeroAreaMask;
- }
-
- // determine front winding tris
- // CW +det
- // CCW det < 0;
- // 0 area triangles are marked as backfacing regardless of winding order,
- // which is required behavior for conservative rast and wireframe rendering
- uint32_t frontWindingTris;
- if (rastState.frontWinding == SWR_FRONTWINDING_CW)
- {
- maskLo = SIMD_T::movemask_pd(
- SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
- maskHi = SIMD_T::movemask_pd(
- SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
- }
- else
- {
- maskLo = SIMD_T::movemask_pd(
- SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[0])));
- maskHi = SIMD_T::movemask_pd(
- SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[1])));
- }
- frontWindingTris = maskLo | (maskHi << (SIMD_WIDTH / 2));
-
- // cull
- uint32_t cullTris;
- switch ((SWR_CULLMODE)rastState.cullMode)
- {
- case SWR_CULLMODE_BOTH:
- cullTris = 0xffffffff;
- break;
- case SWR_CULLMODE_NONE:
- cullTris = 0x0;
- break;
- case SWR_CULLMODE_FRONT:
- cullTris = frontWindingTris;
- break;
- // 0 area triangles are marked as backfacing, which is required behavior for conservative
- // rast
- case SWR_CULLMODE_BACK:
- cullTris = ~frontWindingTris;
- break;
- default:
- SWR_INVALID("Invalid cull mode: %d", rastState.cullMode);
- cullTris = 0x0;
- break;
- }
-
- triMask &= ~cullTris;
-
- if (origTriMask ^ triMask)
- {
- RDTSC_EVENT(pDC->pContext->pBucketMgr,
- FECullZeroAreaAndBackface,
- _mm_popcnt_u32(origTriMask ^ triMask),
- 0);
- }
-
- AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
-
- /// Note: these variable initializations must stay above any 'goto endBenTriangles'
- // compute per tri backface
- uint32_t frontFaceMask = frontWindingTris;
- uint32_t* pPrimID = (uint32_t*)&primID;
- const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
- uint32_t triIndex = 0;
-
- uint32_t edgeEnable;
- PFN_WORK_FUNC pfnWork;
- if (CT::IsConservativeT::value)
- {
- // determine which edges of the degenerate tri, if any, are valid to rasterize.
- // used to call the appropriate templated rasterizer function
- if (cullZeroAreaMask > 0)
- {
- // e0 = v1-v0
- const Integer<SIMD_T> x0x1Mask = SIMD_T::cmpeq_epi32(vXi[0], vXi[1]);
- const Integer<SIMD_T> y0y1Mask = SIMD_T::cmpeq_epi32(vYi[0], vYi[1]);
-
- uint32_t e0Mask =
- SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x0x1Mask, y0y1Mask)));
-
- // e1 = v2-v1
- const Integer<SIMD_T> x1x2Mask = SIMD_T::cmpeq_epi32(vXi[1], vXi[2]);
- const Integer<SIMD_T> y1y2Mask = SIMD_T::cmpeq_epi32(vYi[1], vYi[2]);
-
- uint32_t e1Mask =
- SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x1x2Mask, y1y2Mask)));
-
- // e2 = v0-v2
- // if v0 == v1 & v1 == v2, v0 == v2
- uint32_t e2Mask = e0Mask & e1Mask;
- SWR_ASSERT(KNOB_SIMD_WIDTH == 8, "Need to update degenerate mask code for avx512");
-
- // edge order: e0 = v0v1, e1 = v1v2, e2 = v0v2
- // 32 bit binary: 0000 0000 0010 0100 1001 0010 0100 1001
- e0Mask = pdep_u32(e0Mask, 0x00249249);
-
- // 32 bit binary: 0000 0000 0100 1001 0010 0100 1001 0010
- e1Mask = pdep_u32(e1Mask, 0x00492492);
-
- // 32 bit binary: 0000 0000 1001 0010 0100 1001 0010 0100
- e2Mask = pdep_u32(e2Mask, 0x00924924);
-
- edgeEnable = (0x00FFFFFF & (~(e0Mask | e1Mask | e2Mask)));
- }
- else
- {
- edgeEnable = 0x00FFFFFF;
- }
- }
- else
- {
- // degenerate triangles won't be sent to rasterizer; just enable all edges
- pfnWork = GetRasterizerFunc(rastState.sampleCount,
- rastState.bIsCenterPattern,
- (rastState.conservativeRast > 0),
- (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage,
- EdgeValToEdgeState(ALL_EDGES_VALID),
- (state.scissorsTileAligned == false));
- }
-
- SIMDBBOX_T<SIMD_T> bbox;
-
- if (!triMask)
- {
- goto endBinTriangles;
- }
-
- // Calc bounding box of triangles
- calcBoundingBoxIntVertical<SIMD_T, CT>(vXi, vYi, bbox);
-
- // determine if triangle falls between pixel centers and discard
- // only discard for non-MSAA case and when conservative rast is disabled
- // (xmin + 127) & ~255
- // (xmax + 128) & ~255
- if ((rastState.sampleCount == SWR_MULTISAMPLE_1X || rastState.bIsCenterPattern) &&
- (!CT::IsConservativeT::value))
- {
- origTriMask = triMask;
-
- int cullCenterMask;
-
- {
- Integer<SIMD_T> xmin = SIMD_T::add_epi32(bbox.xmin, SIMD_T::set1_epi32(127));
- xmin = SIMD_T::and_si(xmin, SIMD_T::set1_epi32(~255));
- Integer<SIMD_T> xmax = SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(128));
- xmax = SIMD_T::and_si(xmax, SIMD_T::set1_epi32(~255));
-
- Integer<SIMD_T> vMaskH = SIMD_T::cmpeq_epi32(xmin, xmax);
-
- Integer<SIMD_T> ymin = SIMD_T::add_epi32(bbox.ymin, SIMD_T::set1_epi32(127));
- ymin = SIMD_T::and_si(ymin, SIMD_T::set1_epi32(~255));
- Integer<SIMD_T> ymax = SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(128));
- ymax = SIMD_T::and_si(ymax, SIMD_T::set1_epi32(~255));
-
- Integer<SIMD_T> vMaskV = SIMD_T::cmpeq_epi32(ymin, ymax);
-
- vMaskV = SIMD_T::or_si(vMaskH, vMaskV);
- cullCenterMask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vMaskV));
- }
-
- triMask &= ~cullCenterMask;
-
- if (origTriMask ^ triMask)
- {
- RDTSC_EVENT(pDC->pContext->pBucketMgr,
- FECullBetweenCenters,
- _mm_popcnt_u32(origTriMask ^ triMask),
- 0);
- }
- }
-
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is
- // exclusive. Gather the AOS effective scissor rects based on the per-prim VP index.
- /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
- {
- Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
- if (pa.viewportArrayActive)
-
- {
- GatherScissors(&state.scissorsInFixedPoint[0],
- pViewportIndex,
- scisXmin,
- scisYmin,
- scisXmax,
- scisYmax);
- }
- else // broadcast fast path for non-VPAI case.
- {
- scisXmin = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].xmin);
- scisYmin = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].ymin);
- scisXmax = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].xmax);
- scisYmax = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].ymax);
- }
-
- // Make triangle bbox inclusive
- bbox.xmax = SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1));
- bbox.ymax = SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1));
-
- bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
- bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
- bbox.xmax = SIMD_T::min_epi32(bbox.xmax, scisXmax);
- bbox.ymax = SIMD_T::min_epi32(bbox.ymax, scisYmax);
- }
-
- if (CT::IsConservativeT::value)
- {
- // in the case where a degenerate triangle is on a scissor edge, we need to make sure the
- // primitive bbox has some area. Bump the xmax/ymax edges out
-
- Integer<SIMD_T> topEqualsBottom = SIMD_T::cmpeq_epi32(bbox.ymin, bbox.ymax);
- bbox.ymax = SIMD_T::blendv_epi32(
- bbox.ymax, SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), topEqualsBottom);
-
- Integer<SIMD_T> leftEqualsRight = SIMD_T::cmpeq_epi32(bbox.xmin, bbox.xmax);
- bbox.xmax = SIMD_T::blendv_epi32(
- bbox.xmax, SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), leftEqualsRight);
- }
-
- // Cull tris completely outside scissor
- {
- Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
- Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
- Integer<SIMD_T> maskOutsideScissorXY =
- SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
- uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
- triMask = triMask & ~maskOutsideScissor;
- }
-
-#if KNOB_ENABLE_EARLY_RAST
- if (rastState.sampleCount == SWR_MULTISAMPLE_1X && !CT::IsConservativeT::value)
- {
- // Try early rasterization - culling small triangles which do not cover any pixels
-
- // convert to ER tiles
- SIMDBBOX_T<SIMD_T> er_bbox;
-
- er_bbox.xmin =
- SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmin);
- er_bbox.xmax =
- SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmax);
- er_bbox.ymin =
- SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymin);
- er_bbox.ymax =
- SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymax);
-
- Integer<SIMD_T> vTileX = SIMD_T::cmpeq_epi32(er_bbox.xmin, er_bbox.xmax);
- Integer<SIMD_T> vTileY = SIMD_T::cmpeq_epi32(er_bbox.ymin, er_bbox.ymax);
-
- // Take only triangles that fit into ER tile
- uint32_t oneTileMask =
- triMask & SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(vTileX, vTileY)));
-
- if (oneTileMask)
- {
- // determine CW tris (det > 0)
- uint32_t maskCwLo = SIMD_T::movemask_pd(
- SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
- uint32_t maskCwHi = SIMD_T::movemask_pd(
- SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
- uint32_t cwTrisMask = maskCwLo | (maskCwHi << (SIMD_WIDTH / 2));
-
- // Try early rasterization
- triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(
- pDC, er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
-
- if (!triMask)
- {
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
- return;
- }
- }
- }
-#endif
-
-endBinTriangles:
-
-
- if (!triMask)
- {
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
- return;
- }
-
- // Send surviving triangles to the line or point binner based on fill mode
- if (rastState.fillMode == SWR_FILLMODE_WIREFRAME)
- {
- // Simple non-conformant wireframe mode, useful for debugging
- // construct 3 SIMD lines out of the triangle and call the line binner for each SIMD
- Vec4<SIMD_T> line[2];
- Float<SIMD_T> recipW[2];
-
- line[0] = tri[0];
- line[1] = tri[1];
- recipW[0] = vRecipW0;
- recipW[1] = vRecipW1;
-
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
-
- line[0] = tri[1];
- line[1] = tri[2];
- recipW[0] = vRecipW1;
- recipW[1] = vRecipW2;
-
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
-
- line[0] = tri[2];
- line[1] = tri[0];
- recipW[0] = vRecipW2;
- recipW[1] = vRecipW0;
-
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
- return;
- }
- else if (rastState.fillMode == SWR_FILLMODE_POINT)
- {
- // Bin 3 points
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
- return;
- }
-
- // Convert triangle bbox to macrotile units.
- bbox.xmin = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmin);
- bbox.ymin = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymin);
- bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
- bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
-
- OSALIGNSIMD16(uint32_t)
- aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
-
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
-
- // transpose verts needed for backend
- /// @todo modify BE to take non-transformed verts
- OSALIGNSIMD16(simd4scalar) vHorizX[SIMD_WIDTH];
- OSALIGNSIMD16(simd4scalar) vHorizY[SIMD_WIDTH];
- OSALIGNSIMD16(simd4scalar) vHorizZ[SIMD_WIDTH];
- OSALIGNSIMD16(simd4scalar) vHorizW[SIMD_WIDTH];
-
- TransposeVertices(vHorizX, tri[0].x, tri[1].x, tri[2].x);
- TransposeVertices(vHorizY, tri[0].y, tri[1].y, tri[2].y);
- TransposeVertices(vHorizZ, tri[0].z, tri[1].z, tri[2].z);
- TransposeVertices(vHorizW, vRecipW0, vRecipW1, vRecipW2);
-
- // scan remaining valid triangles and bin each separately
- while (_BitScanForward((unsigned long*)&triIndex, triMask))
- {
- uint32_t linkageCount = state.backendState.numAttributes;
- uint32_t numScalarAttribs = linkageCount * 4;
-
- BE_WORK work;
- work.type = DRAW;
-
- bool isDegenerate;
- if (CT::IsConservativeT::value)
- {
- // only rasterize valid edges if we have a degenerate primitive
- int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
- work.pfnWork =
- GetRasterizerFunc(rastState.sampleCount,
- rastState.bIsCenterPattern,
- (rastState.conservativeRast > 0),
- (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage,
- EdgeValToEdgeState(triEdgeEnable),
- (state.scissorsTileAligned == false));
-
- // Degenerate triangles are required to be constant interpolated
- isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
- }
- else
- {
- isDegenerate = false;
- work.pfnWork = pfnWork;
- }
-
- // Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs =
- GetProcessAttributesFunc(3,
- state.backendState.swizzleEnable,
- state.backendState.constantInterpolationMask,
- isDegenerate);
-
- TRIANGLE_WORK_DESC& desc = work.desc.tri;
-
- desc.triFlags.frontFacing = state.forceFront ? 1 : ((frontFaceMask >> triIndex) & 1);
- desc.triFlags.renderTargetArrayIndex = aRTAI[triIndex];
- desc.triFlags.viewportIndex = pViewportIndex[triIndex];
-
- auto pArena = pDC->pArena;
- SWR_ASSERT(pArena != nullptr);
-
- // store active attribs
- float* pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
- desc.pAttribs = pAttribs;
- desc.numAttribs = linkageCount;
- pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
-
- // store triangle vertex data
- desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
-
- SIMD128::store_ps(&desc.pTriBuffer[0], vHorizX[triIndex]);
- SIMD128::store_ps(&desc.pTriBuffer[4], vHorizY[triIndex]);
- SIMD128::store_ps(&desc.pTriBuffer[8], vHorizZ[triIndex]);
- SIMD128::store_ps(&desc.pTriBuffer[12], vHorizW[triIndex]);
-
- // store user clip distances
- if (state.backendState.clipDistanceMask)
- {
- uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
- desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 3 * sizeof(float));
- ProcessUserClipDist<3>(
- state.backendState, pa, triIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
- }
-
- for (uint32_t y = aMTTop[triIndex]; y <= aMTBottom[triIndex]; ++y)
- {
- for (uint32_t x = aMTLeft[triIndex]; x <= aMTRight[triIndex]; ++x)
- {
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_SETUP_TRIS)
-#endif
- {
- pTileMgr->enqueue(x, y, &work);
- }
- }
- }
-
- triMask &= ~(1 << triIndex);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
-}
-
-template <typename CT>
-void BinTriangles(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector tri[3],
- uint32_t triMask,
- simdscalari const& primID,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(
- pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
-}
-
-#if USE_SIMD16_FRONTEND
-template <typename CT>
-void SIMDCALL BinTriangles_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector tri[3],
- uint32_t triMask,
- simd16scalari const& primID,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(
- pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
-}
-
-#endif
-struct FEBinTrianglesChooser
-{
- typedef PFN_PROCESS_PRIMS FuncType;
-
- template <typename... ArgsB>
- static FuncType GetFunc()
- {
- return BinTriangles<ConservativeRastFETraits<ArgsB...>>;
- }
-};
-
-// Selector for correct templated BinTrinagles function
-PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative)
-{
- return TemplateArgUnroller<FEBinTrianglesChooser>::GetFunc(IsConservative);
-}
-
-#if USE_SIMD16_FRONTEND
-struct FEBinTrianglesChooser_simd16
-{
- typedef PFN_PROCESS_PRIMS_SIMD16 FuncType;
-
- template <typename... ArgsB>
- static FuncType GetFunc()
- {
- return BinTriangles_simd16<ConservativeRastFETraits<ArgsB...>>;
- }
-};
-
-// Selector for correct templated BinTrinagles function
-PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative)
-{
- return TemplateArgUnroller<FEBinTrianglesChooser_simd16>::GetFunc(IsConservative);
-}
-
-#endif
-
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- uint32_t primMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinPoints, pDC->drawId);
-
- Vec4<SIMD_T>& primVerts = prim[0];
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
-
- // Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(
- 1, state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
-
- // convert to fixed point
- Integer<SIMD_T> vXi, vYi;
-
- vXi = fpToFixedPointVertical<SIMD_T>(primVerts.x);
- vYi = fpToFixedPointVertical<SIMD_T>(primVerts.y);
-
- if (CanUseSimplePoints(pDC))
- {
- // adjust for ymin-xmin rule
- vXi = SIMD_T::sub_epi32(vXi, SIMD_T::set1_epi32(1));
- vYi = SIMD_T::sub_epi32(vYi, SIMD_T::set1_epi32(1));
-
- // cull points off the ymin-xmin edge of the viewport
- primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vXi));
- primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vYi));
-
- // compute macro tile coordinates
- Integer<SIMD_T> macroX = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(vXi);
- Integer<SIMD_T> macroY = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(vYi);
-
- OSALIGNSIMD16(uint32_t) aMacroX[SIMD_WIDTH], aMacroY[SIMD_WIDTH];
-
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMacroX), macroX);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMacroY), macroY);
-
- // compute raster tile coordinates
- Integer<SIMD_T> rasterX =
- SIMD_T::template srai_epi32<KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT>(vXi);
- Integer<SIMD_T> rasterY =
- SIMD_T::template srai_epi32<KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT>(vYi);
-
- // compute raster tile relative x,y for coverage mask
- Integer<SIMD_T> tileAlignedX = SIMD_T::template slli_epi32<KNOB_TILE_X_DIM_SHIFT>(rasterX);
- Integer<SIMD_T> tileAlignedY = SIMD_T::template slli_epi32<KNOB_TILE_Y_DIM_SHIFT>(rasterY);
-
- Integer<SIMD_T> tileRelativeX =
- SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vXi), tileAlignedX);
- Integer<SIMD_T> tileRelativeY =
- SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vYi), tileAlignedY);
-
- OSALIGNSIMD16(uint32_t) aTileRelativeX[SIMD_WIDTH];
- OSALIGNSIMD16(uint32_t) aTileRelativeY[SIMD_WIDTH];
-
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileRelativeX), tileRelativeX);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileRelativeY), tileRelativeY);
-
- OSALIGNSIMD16(uint32_t) aTileAlignedX[SIMD_WIDTH];
- OSALIGNSIMD16(uint32_t) aTileAlignedY[SIMD_WIDTH];
-
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileAlignedX), tileAlignedX);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileAlignedY), tileAlignedY);
-
- OSALIGNSIMD16(float) aZ[SIMD_WIDTH];
- SIMD_T::store_ps(reinterpret_cast<float*>(aZ), primVerts.z);
-
- // store render target array index
- const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
-
- uint32_t* pPrimID = (uint32_t*)&primID;
- uint32_t primIndex = 0;
-
- const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
-
- // scan remaining valid triangles and bin each separately
- while (_BitScanForward((unsigned long*)&primIndex, primMask))
- {
- uint32_t linkageCount = backendState.numAttributes;
- uint32_t numScalarAttribs = linkageCount * 4;
-
- BE_WORK work;
- work.type = DRAW;
-
- TRIANGLE_WORK_DESC& desc = work.desc.tri;
-
- // points are always front facing
- desc.triFlags.frontFacing = 1;
- desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
- desc.triFlags.viewportIndex = pViewportIndex[primIndex];
-
- work.pfnWork = RasterizeSimplePoint;
-
- auto pArena = pDC->pArena;
- SWR_ASSERT(pArena != nullptr);
-
- // store attributes
- float* pAttribs =
- (float*)pArena->AllocAligned(3 * numScalarAttribs * sizeof(float), 16);
- desc.pAttribs = pAttribs;
- desc.numAttribs = linkageCount;
-
- pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
-
- // store raster tile aligned x, y, perspective correct z
- float* pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
- desc.pTriBuffer = pTriBuffer;
- *(uint32_t*)pTriBuffer++ = aTileAlignedX[primIndex];
- *(uint32_t*)pTriBuffer++ = aTileAlignedY[primIndex];
- *pTriBuffer = aZ[primIndex];
-
- uint32_t tX = aTileRelativeX[primIndex];
- uint32_t tY = aTileRelativeY[primIndex];
-
- // pack the relative x,y into the coverageMask, the rasterizer will
- // generate the true coverage mask from it
- work.desc.tri.triFlags.coverageMask = tX | (tY << 4);
-
- // bin it
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_SETUP_TRIS)
-#endif
- {
- pTileMgr->enqueue(aMacroX[primIndex], aMacroY[primIndex], &work);
- }
-
- primMask &= ~(1 << primIndex);
- }
- }
- else
- {
- // non simple points need to be potentially binned to multiple macro tiles
- Float<SIMD_T> vPointSize;
-
- if (rastState.pointParam)
- {
- Vec4<SIMD_T> size[3];
- pa.Assemble(VERTEX_SGV_SLOT, size);
- vPointSize = size[0][VERTEX_SGV_POINT_SIZE_COMP];
- }
- else
- {
- vPointSize = SIMD_T::set1_ps(rastState.pointSize);
- }
-
- // bloat point to bbox
- SIMDBBOX_T<SIMD_T> bbox;
-
- bbox.xmin = bbox.xmax = vXi;
- bbox.ymin = bbox.ymax = vYi;
-
- Float<SIMD_T> vHalfWidth = SIMD_T::mul_ps(vPointSize, SIMD_T::set1_ps(0.5f));
- Integer<SIMD_T> vHalfWidthi = fpToFixedPointVertical<SIMD_T>(vHalfWidth);
-
- bbox.xmin = SIMD_T::sub_epi32(bbox.xmin, vHalfWidthi);
- bbox.xmax = SIMD_T::add_epi32(bbox.xmax, vHalfWidthi);
- bbox.ymin = SIMD_T::sub_epi32(bbox.ymin, vHalfWidthi);
- bbox.ymax = SIMD_T::add_epi32(bbox.ymax, vHalfWidthi);
-
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge
- // is exclusive. Gather the AOS effective scissor rects based on the per-prim VP index.
- /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
- {
- Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
-
- if (pa.viewportArrayActive)
- {
- GatherScissors(&state.scissorsInFixedPoint[0],
- pViewportIndex,
- scisXmin,
- scisYmin,
- scisXmax,
- scisYmax);
- }
- else // broadcast fast path for non-VPAI case.
- {
- scisXmin = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].xmin);
- scisYmin = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].ymin);
- scisXmax = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].xmax);
- scisYmax = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].ymax);
- }
-
- bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
- bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
- bbox.xmax =
- SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
- bbox.ymax =
- SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
- }
-
- // Cull bloated points completely outside scissor
- Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
- Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
- Integer<SIMD_T> maskOutsideScissorXY =
- SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
- uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
- primMask = primMask & ~maskOutsideScissor;
-
- // Convert bbox to macrotile units.
- bbox.xmin = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmin);
- bbox.ymin = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymin);
- bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
- bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
-
- OSALIGNSIMD16(uint32_t)
- aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
-
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
-
- // store render target array index
- const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
-
- OSALIGNSIMD16(float) aPointSize[SIMD_WIDTH];
- SIMD_T::store_ps(reinterpret_cast<float*>(aPointSize), vPointSize);
-
- uint32_t* pPrimID = (uint32_t*)&primID;
-
- OSALIGNSIMD16(float) aPrimVertsX[SIMD_WIDTH];
- OSALIGNSIMD16(float) aPrimVertsY[SIMD_WIDTH];
- OSALIGNSIMD16(float) aPrimVertsZ[SIMD_WIDTH];
-
- SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsX), primVerts.x);
- SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsY), primVerts.y);
- SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsZ), primVerts.z);
-
- // scan remaining valid prims and bin each separately
- const SWR_BACKEND_STATE& backendState = state.backendState;
- uint32_t primIndex;
- while (_BitScanForward((unsigned long*)&primIndex, primMask))
- {
- uint32_t linkageCount = backendState.numAttributes;
- uint32_t numScalarAttribs = linkageCount * 4;
-
- BE_WORK work;
- work.type = DRAW;
-
- TRIANGLE_WORK_DESC& desc = work.desc.tri;
-
- desc.triFlags.frontFacing = 1;
- desc.triFlags.pointSize = aPointSize[primIndex];
- desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
- desc.triFlags.viewportIndex = pViewportIndex[primIndex];
-
- work.pfnWork = RasterizeTriPoint;
-
- auto pArena = pDC->pArena;
- SWR_ASSERT(pArena != nullptr);
-
- // store active attribs
- desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
- desc.numAttribs = linkageCount;
- pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
-
- // store point vertex data
- float* pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
- desc.pTriBuffer = pTriBuffer;
- *pTriBuffer++ = aPrimVertsX[primIndex];
- *pTriBuffer++ = aPrimVertsY[primIndex];
- *pTriBuffer = aPrimVertsZ[primIndex];
-
- // store user clip distances
- if (backendState.clipDistanceMask)
- {
- uint32_t numClipDist = _mm_popcnt_u32(backendState.clipDistanceMask);
- desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 3 * sizeof(float));
- float dists[8];
- float one = 1.0f;
- ProcessUserClipDist<1>(backendState, pa, primIndex, &one, dists);
- for (uint32_t i = 0; i < numClipDist; i++)
- {
- desc.pUserClipBuffer[3 * i + 0] = 0.0f;
- desc.pUserClipBuffer[3 * i + 1] = 0.0f;
- desc.pUserClipBuffer[3 * i + 2] = dists[i];
- }
- }
-
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
- for (uint32_t y = aMTTop[primIndex]; y <= aMTBottom[primIndex]; ++y)
- {
- for (uint32_t x = aMTLeft[primIndex]; x <= aMTRight[primIndex]; ++x)
- {
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_SETUP_TRIS)
-#endif
- {
- pTileMgr->enqueue(x, y, &work);
- }
- }
- }
-
- primMask &= ~(1 << primIndex);
- }
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinPoints, 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Bin SIMD points to the backend. Only supports point size of 1
-/// @param pDC - pointer to draw context.
-/// @param pa - The primitive assembly object.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param tri - Contains point position data for SIMDs worth of points.
-/// @param primID - Primitive ID for each point.
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPointsImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[3],
- uint32_t primMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx)
-{
- const API_STATE& state = GetApiState(pDC);
- const SWR_FRONTEND_STATE& feState = state.frontendState;
- const SWR_RASTSTATE& rastState = state.rastState;
-
- if (!feState.vpTransformDisable)
- {
- // perspective divide
- Float<SIMD_T> vRecipW0 = SIMD_T::div_ps(SIMD_T::set1_ps(1.0f), prim[0].w);
-
- prim[0].x = SIMD_T::mul_ps(prim[0].x, vRecipW0);
- prim[0].y = SIMD_T::mul_ps(prim[0].y, vRecipW0);
- prim[0].z = SIMD_T::mul_ps(prim[0].z, vRecipW0);
-
- // viewport transform to screen coords
- if (pa.viewportArrayActive)
- {
- viewportTransform<1>(prim, state.vpMatrices, viewportIdx);
- }
- else
- {
- viewportTransform<1>(prim, state.vpMatrices);
- }
- }
-
- Float<SIMD_T> offset = SwrPixelOffsets<SIMD_T>::GetOffset(rastState.pixelLocation);
-
- prim[0].x = SIMD_T::add_ps(prim[0].x, offset);
- prim[0].y = SIMD_T::add_ps(prim[0].y, offset);
-
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
-}
-
-void BinPoints(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prim[3],
- uint32_t primMask,
- simdscalari const& primID,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
- pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
-}
-
-#if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prim[3],
- uint32_t primMask,
- simd16scalari const& primID,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
- pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
-}
-
-#endif
-//////////////////////////////////////////////////////////////////////////
-/// @brief Bin SIMD lines to the backend.
-/// @param pDC - pointer to draw context.
-/// @param pa - The primitive assembly object.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param tri - Contains line position data for SIMDs worth of points.
-/// @param primID - Primitive ID for each line.
-/// @param viewportIdx - Viewport Array Index for each line.
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- Float<SIMD_T> recipW[],
- uint32_t primMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx)
-{
- const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinLines, pDC->drawId);
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
-
- // Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(
- 2, state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
-
- Float<SIMD_T>& vRecipW0 = recipW[0];
- Float<SIMD_T>& vRecipW1 = recipW[1];
-
- // convert to fixed point
- Integer<SIMD_T> vXi[2], vYi[2];
-
- vXi[0] = fpToFixedPointVertical<SIMD_T>(prim[0].x);
- vYi[0] = fpToFixedPointVertical<SIMD_T>(prim[0].y);
- vXi[1] = fpToFixedPointVertical<SIMD_T>(prim[1].x);
- vYi[1] = fpToFixedPointVertical<SIMD_T>(prim[1].y);
-
- // compute x-major vs y-major mask
- Integer<SIMD_T> xLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vXi[0], vXi[1]));
- Integer<SIMD_T> yLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vYi[0], vYi[1]));
- Float<SIMD_T> vYmajorMask = SIMD_T::castsi_ps(SIMD_T::cmpgt_epi32(yLength, xLength));
- uint32_t yMajorMask = SIMD_T::movemask_ps(vYmajorMask);
-
- // cull zero-length lines
- Integer<SIMD_T> vZeroLengthMask = SIMD_T::cmpeq_epi32(xLength, SIMD_T::setzero_si());
- vZeroLengthMask =
- SIMD_T::and_si(vZeroLengthMask, SIMD_T::cmpeq_epi32(yLength, SIMD_T::setzero_si()));
-
- primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vZeroLengthMask));
-
- uint32_t* pPrimID = (uint32_t*)&primID;
- const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
-
- // Calc bounding box of lines
- SIMDBBOX_T<SIMD_T> bbox;
- bbox.xmin = SIMD_T::min_epi32(vXi[0], vXi[1]);
- bbox.xmax = SIMD_T::max_epi32(vXi[0], vXi[1]);
- bbox.ymin = SIMD_T::min_epi32(vYi[0], vYi[1]);
- bbox.ymax = SIMD_T::max_epi32(vYi[0], vYi[1]);
-
- // bloat bbox by line width along minor axis
- Float<SIMD_T> vHalfWidth = SIMD_T::set1_ps(rastState.lineWidth / 2.0f);
- Integer<SIMD_T> vHalfWidthi = fpToFixedPointVertical<SIMD_T>(vHalfWidth);
-
- SIMDBBOX_T<SIMD_T> bloatBox;
-
- bloatBox.xmin = SIMD_T::sub_epi32(bbox.xmin, vHalfWidthi);
- bloatBox.xmax = SIMD_T::add_epi32(bbox.xmax, vHalfWidthi);
- bloatBox.ymin = SIMD_T::sub_epi32(bbox.ymin, vHalfWidthi);
- bloatBox.ymax = SIMD_T::add_epi32(bbox.ymax, vHalfWidthi);
-
- bbox.xmin = SIMD_T::blendv_epi32(bbox.xmin, bloatBox.xmin, vYmajorMask);
- bbox.xmax = SIMD_T::blendv_epi32(bbox.xmax, bloatBox.xmax, vYmajorMask);
- bbox.ymin = SIMD_T::blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask);
- bbox.ymax = SIMD_T::blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
-
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is
- // exclusive.
- {
- Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
-
- if (pa.viewportArrayActive)
- {
- GatherScissors(&state.scissorsInFixedPoint[0],
- pViewportIndex,
- scisXmin,
- scisYmin,
- scisXmax,
- scisYmax);
- }
- else // broadcast fast path for non-VPAI case.
- {
- scisXmin = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].xmin);
- scisYmin = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].ymin);
- scisXmax = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].xmax);
- scisYmax = SIMD_T::set1_epi32(state.scissorsInFixedPoint[0].ymax);
- }
-
- bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
- bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
- bbox.xmax =
- SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
- bbox.ymax =
- SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
- }
-
- // Cull prims completely outside scissor
- {
- Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
- Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
- Integer<SIMD_T> maskOutsideScissorXY =
- SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
- uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
- primMask = primMask & ~maskOutsideScissor;
- }
-
- // transpose verts needed for backend
- /// @todo modify BE to take non-transformed verts
- OSALIGNSIMD16(simd4scalar) vHorizX[SIMD_WIDTH];
- OSALIGNSIMD16(simd4scalar) vHorizY[SIMD_WIDTH];
- OSALIGNSIMD16(simd4scalar) vHorizZ[SIMD_WIDTH];
- OSALIGNSIMD16(simd4scalar) vHorizW[SIMD_WIDTH];
-
- if (!primMask)
- {
- goto endBinLines;
- }
-
- // Convert triangle bbox to macrotile units.
- bbox.xmin = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmin);
- bbox.ymin = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymin);
- bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
- bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
-
- OSALIGNSIMD16(uint32_t)
- aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
-
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
-
- TransposeVertices(vHorizX, prim[0].x, prim[1].x, SIMD_T::setzero_ps());
- TransposeVertices(vHorizY, prim[0].y, prim[1].y, SIMD_T::setzero_ps());
- TransposeVertices(vHorizZ, prim[0].z, prim[1].z, SIMD_T::setzero_ps());
- TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps());
-
- // scan remaining valid prims and bin each separately
- unsigned long primIndex;
- while (_BitScanForward(&primIndex, primMask))
- {
- uint32_t linkageCount = state.backendState.numAttributes;
- uint32_t numScalarAttribs = linkageCount * 4;
-
- BE_WORK work;
- work.type = DRAW;
-
- TRIANGLE_WORK_DESC& desc = work.desc.tri;
-
- desc.triFlags.frontFacing = 1;
- desc.triFlags.yMajor = (yMajorMask >> primIndex) & 1;
- desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
- desc.triFlags.viewportIndex = pViewportIndex[primIndex];
-
- work.pfnWork = RasterizeLine;
-
- auto pArena = pDC->pArena;
- SWR_ASSERT(pArena != nullptr);
-
- // store active attribs
- desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
- desc.numAttribs = linkageCount;
- pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
-
- // store line vertex data
- desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
-
- _mm_store_ps(&desc.pTriBuffer[0], vHorizX[primIndex]);
- _mm_store_ps(&desc.pTriBuffer[4], vHorizY[primIndex]);
- _mm_store_ps(&desc.pTriBuffer[8], vHorizZ[primIndex]);
- _mm_store_ps(&desc.pTriBuffer[12], vHorizW[primIndex]);
-
- // store user clip distances
- if (state.backendState.clipDistanceMask)
- {
- uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
- desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 2 * sizeof(float));
- ProcessUserClipDist<2>(
- state.backendState, pa, primIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
- }
-
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
- for (uint32_t y = aMTTop[primIndex]; y <= aMTBottom[primIndex]; ++y)
- {
- for (uint32_t x = aMTLeft[primIndex]; x <= aMTRight[primIndex]; ++x)
- {
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_SETUP_TRIS)
-#endif
- {
- pTileMgr->enqueue(x, y, &work);
- }
- }
- }
-
- primMask &= ~(1 << primIndex);
- }
-
-endBinLines:
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEBinLines, 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Bin SIMD lines to the backend.
-/// @param pDC - pointer to draw context.
-/// @param pa - The primitive assembly object.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param tri - Contains line position data for SIMDs worth of points.
-/// @param primID - Primitive ID for each line.
-/// @param viewportIdx - Viewport Array Index for each line.
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void SIMDCALL BinLinesImpl(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[3],
- uint32_t primMask,
- Integer<SIMD_T> const& primID,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx)
-{
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const SWR_FRONTEND_STATE& feState = state.frontendState;
-
- Float<SIMD_T> vRecipW[2] = {SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f)};
-
- if (!feState.vpTransformDisable)
- {
- // perspective divide
- vRecipW[0] = SIMD_T::div_ps(SIMD_T::set1_ps(1.0f), prim[0].w);
- vRecipW[1] = SIMD_T::div_ps(SIMD_T::set1_ps(1.0f), prim[1].w);
-
- prim[0].v[0] = SIMD_T::mul_ps(prim[0].v[0], vRecipW[0]);
- prim[1].v[0] = SIMD_T::mul_ps(prim[1].v[0], vRecipW[1]);
-
- prim[0].v[1] = SIMD_T::mul_ps(prim[0].v[1], vRecipW[0]);
- prim[1].v[1] = SIMD_T::mul_ps(prim[1].v[1], vRecipW[1]);
-
- prim[0].v[2] = SIMD_T::mul_ps(prim[0].v[2], vRecipW[0]);
- prim[1].v[2] = SIMD_T::mul_ps(prim[1].v[2], vRecipW[1]);
-
- // viewport transform to screen coords
- if (pa.viewportArrayActive)
- {
- viewportTransform<2>(prim, state.vpMatrices, viewportIdx);
- }
- else
- {
- viewportTransform<2>(prim, state.vpMatrices);
- }
- }
-
- // adjust for pixel center location
- Float<SIMD_T> offset = SwrPixelOffsets<SIMD_T>::GetOffset(rastState.pixelLocation);
-
- prim[0].x = SIMD_T::add_ps(prim[0].x, offset);
- prim[0].y = SIMD_T::add_ps(prim[0].y, offset);
-
- prim[1].x = SIMD_T::add_ps(prim[1].x, offset);
- prim[1].y = SIMD_T::add_ps(prim[1].y, offset);
-
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
- pDC, pa, workerId, prim, vRecipW, primMask, primID, viewportIdx, rtIdx);
-}
-
-void BinLines(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prim[],
- uint32_t primMask,
- simdscalari const& primID,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(
- pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
-}
-
-#if USE_SIMD16_FRONTEND
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prim[3],
- uint32_t primMask,
- simd16scalari const& primID,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(
- pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
-}
-
-#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.h b/src/gallium/drivers/swr/rasterizer/core/binner.h
deleted file mode 100644
index 63be8f67cbf..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/binner.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file binner.h
- *
- * @brief Declaration for the macrotile binner
- *
- ******************************************************************************/
-#include "state.h"
-#include "conservativeRast.h"
-#include "utils.h"
-//////////////////////////////////////////////////////////////////////////
-/// @brief Offsets added to post-viewport vertex positions based on
-/// raster state.
-///
-/// Can't use templated variable because we must stick with C++11 features.
-/// Template variables were introduced with C++14
-template <typename SIMD_T>
-struct SwrPixelOffsets
-{
-public:
- INLINE static Float<SIMD_T> GetOffset(uint32_t loc)
- {
- SWR_ASSERT(loc <= 1);
-
- return SIMD_T::set1_ps(loc ? 0.5f : 0.0f);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Convert the X,Y coords of a triangle to the requested Fixed
-/// Point precision from FP32.
-template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
-INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T>& vIn)
-{
- return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Helper function to set the X,Y coords of a triangle to the
-/// requested Fixed Point precision from FP32.
-/// @param tri: simdvector[3] of FP triangle verts
-/// @param vXi: fixed point X coords of tri verts
-/// @param vYi: fixed point Y coords of tri verts
-template <typename SIMD_T>
-INLINE static void
-FPToFixedPoint(const Vec4<SIMD_T>* const tri, Integer<SIMD_T> (&vXi)[3], Integer<SIMD_T> (&vYi)[3])
-{
- vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
- vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
- vXi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].x);
- vYi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].y);
- vXi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].x);
- vYi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].y);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Calculate bounding box for current triangle
-/// @tparam CT: ConservativeRastFETraits type
-/// @param vX: fixed point X position for triangle verts
-/// @param vY: fixed point Y position for triangle verts
-/// @param bbox: fixed point bbox
-/// *Note*: expects vX, vY to be in the correct precision for the type
-/// of rasterization. This avoids unnecessary FP->fixed conversions.
-template <typename SIMD_T, typename CT>
-INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T> (&vX)[3],
- const Integer<SIMD_T> (&vY)[3],
- SIMDBBOX_T<SIMD_T>& bbox)
-{
- Integer<SIMD_T> vMinX = vX[0];
-
- vMinX = SIMD_T::min_epi32(vMinX, vX[1]);
- vMinX = SIMD_T::min_epi32(vMinX, vX[2]);
-
- Integer<SIMD_T> vMaxX = vX[0];
-
- vMaxX = SIMD_T::max_epi32(vMaxX, vX[1]);
- vMaxX = SIMD_T::max_epi32(vMaxX, vX[2]);
-
- Integer<SIMD_T> vMinY = vY[0];
-
- vMinY = SIMD_T::min_epi32(vMinY, vY[1]);
- vMinY = SIMD_T::min_epi32(vMinY, vY[2]);
-
- Integer<SIMD_T> vMaxY = vY[0];
-
- vMaxY = SIMD_T::max_epi32(vMaxY, vY[1]);
- vMaxY = SIMD_T::max_epi32(vMaxY, vY[2]);
-
- if (CT::BoundingBoxOffsetT::value != 0)
- {
- /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative
- /// rasterization expand bbox by 1/256; coverage will be correctly handled in the
- /// rasterizer.
-
- const Integer<SIMD_T> value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
-
- vMinX = SIMD_T::sub_epi32(vMinX, value);
- vMaxX = SIMD_T::add_epi32(vMaxX, value);
- vMinY = SIMD_T::sub_epi32(vMinY, value);
- vMaxY = SIMD_T::add_epi32(vMaxY, value);
- }
-
- bbox.xmin = vMinX;
- bbox.xmax = vMaxX;
- bbox.ymin = vMinY;
- bbox.ymax = vMaxY;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Gather scissor rect data based on per-prim viewport indices.
-/// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
-/// @param pViewportIndex - array of per-primitive viewport indexes.
-/// @param scisXmin - output vector of per-primitive scissor rect Xmin data.
-/// @param scisYmin - output vector of per-primitive scissor rect Ymin data.
-/// @param scisXmax - output vector of per-primitive scissor rect Xmax data.
-/// @param scisYmax - output vector of per-primitive scissor rect Ymax data.
-//
-/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
-static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
- const uint32_t* pViewportIndex,
- simdscalari& scisXmin,
- simdscalari& scisYmin,
- simdscalari& scisXmax,
- simdscalari& scisYmax)
-{
- scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmin,
- pScissorsInFixedPoint[pViewportIndex[6]].xmin,
- pScissorsInFixedPoint[pViewportIndex[5]].xmin,
- pScissorsInFixedPoint[pViewportIndex[4]].xmin,
- pScissorsInFixedPoint[pViewportIndex[3]].xmin,
- pScissorsInFixedPoint[pViewportIndex[2]].xmin,
- pScissorsInFixedPoint[pViewportIndex[1]].xmin,
- pScissorsInFixedPoint[pViewportIndex[0]].xmin);
- scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymin,
- pScissorsInFixedPoint[pViewportIndex[6]].ymin,
- pScissorsInFixedPoint[pViewportIndex[5]].ymin,
- pScissorsInFixedPoint[pViewportIndex[4]].ymin,
- pScissorsInFixedPoint[pViewportIndex[3]].ymin,
- pScissorsInFixedPoint[pViewportIndex[2]].ymin,
- pScissorsInFixedPoint[pViewportIndex[1]].ymin,
- pScissorsInFixedPoint[pViewportIndex[0]].ymin);
- scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmax,
- pScissorsInFixedPoint[pViewportIndex[6]].xmax,
- pScissorsInFixedPoint[pViewportIndex[5]].xmax,
- pScissorsInFixedPoint[pViewportIndex[4]].xmax,
- pScissorsInFixedPoint[pViewportIndex[3]].xmax,
- pScissorsInFixedPoint[pViewportIndex[2]].xmax,
- pScissorsInFixedPoint[pViewportIndex[1]].xmax,
- pScissorsInFixedPoint[pViewportIndex[0]].xmax);
- scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymax,
- pScissorsInFixedPoint[pViewportIndex[6]].ymax,
- pScissorsInFixedPoint[pViewportIndex[5]].ymax,
- pScissorsInFixedPoint[pViewportIndex[4]].ymax,
- pScissorsInFixedPoint[pViewportIndex[3]].ymax,
- pScissorsInFixedPoint[pViewportIndex[2]].ymax,
- pScissorsInFixedPoint[pViewportIndex[1]].ymax,
- pScissorsInFixedPoint[pViewportIndex[0]].ymax);
-}
-
-static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
- const uint32_t* pViewportIndex,
- simd16scalari& scisXmin,
- simd16scalari& scisYmin,
- simd16scalari& scisXmax,
- simd16scalari& scisYmax)
-{
- scisXmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmin,
- pScissorsInFixedPoint[pViewportIndex[14]].xmin,
- pScissorsInFixedPoint[pViewportIndex[13]].xmin,
- pScissorsInFixedPoint[pViewportIndex[12]].xmin,
- pScissorsInFixedPoint[pViewportIndex[11]].xmin,
- pScissorsInFixedPoint[pViewportIndex[10]].xmin,
- pScissorsInFixedPoint[pViewportIndex[9]].xmin,
- pScissorsInFixedPoint[pViewportIndex[8]].xmin,
- pScissorsInFixedPoint[pViewportIndex[7]].xmin,
- pScissorsInFixedPoint[pViewportIndex[6]].xmin,
- pScissorsInFixedPoint[pViewportIndex[5]].xmin,
- pScissorsInFixedPoint[pViewportIndex[4]].xmin,
- pScissorsInFixedPoint[pViewportIndex[3]].xmin,
- pScissorsInFixedPoint[pViewportIndex[2]].xmin,
- pScissorsInFixedPoint[pViewportIndex[1]].xmin,
- pScissorsInFixedPoint[pViewportIndex[0]].xmin);
-
- scisYmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymin,
- pScissorsInFixedPoint[pViewportIndex[14]].ymin,
- pScissorsInFixedPoint[pViewportIndex[13]].ymin,
- pScissorsInFixedPoint[pViewportIndex[12]].ymin,
- pScissorsInFixedPoint[pViewportIndex[11]].ymin,
- pScissorsInFixedPoint[pViewportIndex[10]].ymin,
- pScissorsInFixedPoint[pViewportIndex[9]].ymin,
- pScissorsInFixedPoint[pViewportIndex[8]].ymin,
- pScissorsInFixedPoint[pViewportIndex[7]].ymin,
- pScissorsInFixedPoint[pViewportIndex[6]].ymin,
- pScissorsInFixedPoint[pViewportIndex[5]].ymin,
- pScissorsInFixedPoint[pViewportIndex[4]].ymin,
- pScissorsInFixedPoint[pViewportIndex[3]].ymin,
- pScissorsInFixedPoint[pViewportIndex[2]].ymin,
- pScissorsInFixedPoint[pViewportIndex[1]].ymin,
- pScissorsInFixedPoint[pViewportIndex[0]].ymin);
-
- scisXmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmax,
- pScissorsInFixedPoint[pViewportIndex[14]].xmax,
- pScissorsInFixedPoint[pViewportIndex[13]].xmax,
- pScissorsInFixedPoint[pViewportIndex[12]].xmax,
- pScissorsInFixedPoint[pViewportIndex[11]].xmax,
- pScissorsInFixedPoint[pViewportIndex[10]].xmax,
- pScissorsInFixedPoint[pViewportIndex[9]].xmax,
- pScissorsInFixedPoint[pViewportIndex[8]].xmax,
- pScissorsInFixedPoint[pViewportIndex[7]].xmax,
- pScissorsInFixedPoint[pViewportIndex[6]].xmax,
- pScissorsInFixedPoint[pViewportIndex[5]].xmax,
- pScissorsInFixedPoint[pViewportIndex[4]].xmax,
- pScissorsInFixedPoint[pViewportIndex[3]].xmax,
- pScissorsInFixedPoint[pViewportIndex[2]].xmax,
- pScissorsInFixedPoint[pViewportIndex[1]].xmax,
- pScissorsInFixedPoint[pViewportIndex[0]].xmax);
-
- scisYmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymax,
- pScissorsInFixedPoint[pViewportIndex[14]].ymax,
- pScissorsInFixedPoint[pViewportIndex[13]].ymax,
- pScissorsInFixedPoint[pViewportIndex[12]].ymax,
- pScissorsInFixedPoint[pViewportIndex[11]].ymax,
- pScissorsInFixedPoint[pViewportIndex[10]].ymax,
- pScissorsInFixedPoint[pViewportIndex[9]].ymax,
- pScissorsInFixedPoint[pViewportIndex[8]].ymax,
- pScissorsInFixedPoint[pViewportIndex[7]].ymax,
- pScissorsInFixedPoint[pViewportIndex[6]].ymax,
- pScissorsInFixedPoint[pViewportIndex[5]].ymax,
- pScissorsInFixedPoint[pViewportIndex[4]].ymax,
- pScissorsInFixedPoint[pViewportIndex[3]].ymax,
- pScissorsInFixedPoint[pViewportIndex[2]].ymax,
- pScissorsInFixedPoint[pViewportIndex[1]].ymax,
- pScissorsInFixedPoint[pViewportIndex[0]].ymax);
-} \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/blend.h b/src/gallium/drivers/swr/rasterizer/core/blend.h
deleted file mode 100644
index 7b2f77985f8..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/blend.h
+++ /dev/null
@@ -1,348 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file blend.cpp
- *
- * @brief Implementation for blending operations.
- *
- ******************************************************************************/
-#include "state.h"
-
-template <bool Color, bool Alpha>
-INLINE void GenerateBlendFactor(SWR_BLEND_FACTOR func,
- simdvector& constantColor,
- simdvector& src,
- simdvector& src1,
- simdvector& dst,
- simdvector& out)
-{
- simdvector result;
-
- switch (func)
- {
- case BLENDFACTOR_ZERO:
- result.x = _simd_setzero_ps();
- result.y = _simd_setzero_ps();
- result.z = _simd_setzero_ps();
- result.w = _simd_setzero_ps();
- break;
-
- case BLENDFACTOR_ONE:
- result.x = _simd_set1_ps(1.0);
- result.y = _simd_set1_ps(1.0);
- result.z = _simd_set1_ps(1.0);
- result.w = _simd_set1_ps(1.0);
- break;
-
- case BLENDFACTOR_SRC_COLOR:
- result = src;
- break;
-
- case BLENDFACTOR_DST_COLOR:
- result = dst;
- break;
-
- case BLENDFACTOR_INV_SRC_COLOR:
- result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
- result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
- result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
- result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
- break;
-
- case BLENDFACTOR_INV_DST_COLOR:
- result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
- result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
- result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
- result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
- break;
-
- case BLENDFACTOR_SRC_ALPHA:
- result.x = src.w;
- result.y = src.w;
- result.z = src.w;
- result.w = src.w;
- break;
-
- case BLENDFACTOR_INV_SRC_ALPHA:
- {
- simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
- result.x = oneMinusSrcA;
- result.y = oneMinusSrcA;
- result.z = oneMinusSrcA;
- result.w = oneMinusSrcA;
- break;
- }
-
- case BLENDFACTOR_DST_ALPHA:
- result.x = dst.w;
- result.y = dst.w;
- result.z = dst.w;
- result.w = dst.w;
- break;
-
- case BLENDFACTOR_INV_DST_ALPHA:
- {
- simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
- result.x = oneMinusDstA;
- result.y = oneMinusDstA;
- result.z = oneMinusDstA;
- result.w = oneMinusDstA;
- break;
- }
-
- case BLENDFACTOR_SRC_ALPHA_SATURATE:
- {
- simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
- result.x = sat;
- result.y = sat;
- result.z = sat;
- result.w = _simd_set1_ps(1.0);
- break;
- }
-
- case BLENDFACTOR_CONST_COLOR:
- result.x = constantColor[0];
- result.y = constantColor[1];
- result.z = constantColor[2];
- result.w = constantColor[3];
- break;
-
- case BLENDFACTOR_CONST_ALPHA:
- result.x = result.y = result.z = result.w = constantColor[3];
- break;
-
- case BLENDFACTOR_INV_CONST_COLOR:
- {
- result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
- result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
- result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
- result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
- break;
- }
-
- case BLENDFACTOR_INV_CONST_ALPHA:
- {
- result.x = result.y = result.z = result.w =
- _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
- break;
- }
-
- case BLENDFACTOR_SRC1_COLOR:
- result.x = src1.x;
- result.y = src1.y;
- result.z = src1.z;
- result.w = src1.w;
- break;
-
- case BLENDFACTOR_SRC1_ALPHA:
- result.x = result.y = result.z = result.w = src1.w;
- break;
-
- case BLENDFACTOR_INV_SRC1_COLOR:
- result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
- result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
- result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
- result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
- break;
-
- case BLENDFACTOR_INV_SRC1_ALPHA:
- result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
- break;
-
- default:
- SWR_INVALID("Unimplemented blend factor: %d", func);
- }
-
- if (Color)
- {
- out.x = result.x;
- out.y = result.y;
- out.z = result.z;
- }
- if (Alpha)
- {
- out.w = result.w;
- }
-}
-
-template <bool Color, bool Alpha>
-INLINE void BlendFunc(SWR_BLEND_OP blendOp,
- simdvector& src,
- simdvector& srcFactor,
- simdvector& dst,
- simdvector& dstFactor,
- simdvector& out)
-{
- simdvector result;
-
- switch (blendOp)
- {
- case BLENDOP_ADD:
- result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
- result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
- result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
- result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
- break;
-
- case BLENDOP_SUBTRACT:
- result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
- result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
- result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
- result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
- break;
-
- case BLENDOP_REVSUBTRACT:
- result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
- result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
- result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
- result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
- break;
-
- case BLENDOP_MIN:
- result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
- result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
- result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
- result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
- break;
-
- case BLENDOP_MAX:
- result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
- result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
- result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
- result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
- break;
-
- default:
- SWR_INVALID("Unimplemented blend function: %d", blendOp);
- }
-
- if (Color)
- {
- out.x = result.x;
- out.y = result.y;
- out.z = result.z;
- }
- if (Alpha)
- {
- out.w = result.w;
- }
-}
-
-template <SWR_TYPE type>
-INLINE void Clamp(simdvector& src)
-{
- switch (type)
- {
- case SWR_TYPE_FLOAT:
- break;
-
- case SWR_TYPE_UNORM:
- src.x = _simd_max_ps(src.x, _simd_setzero_ps());
- src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
-
- src.y = _simd_max_ps(src.y, _simd_setzero_ps());
- src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
-
- src.z = _simd_max_ps(src.z, _simd_setzero_ps());
- src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
-
- src.w = _simd_max_ps(src.w, _simd_setzero_ps());
- src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
- break;
-
- case SWR_TYPE_SNORM:
- src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
- src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
-
- src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
- src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
-
- src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
- src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
-
- src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
- src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
- break;
-
- default:
- SWR_INVALID("Unimplemented clamp: %d", type);
- break;
- }
-}
-
-template <SWR_TYPE type>
-void Blend(const SWR_BLEND_STATE* pBlendState,
- const SWR_RENDER_TARGET_BLEND_STATE* pState,
- simdvector& src,
- simdvector& src1,
- uint8_t* pDst,
- simdvector& result)
-{
- // load render target
- simdvector dst;
- LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
-
- simdvector constColor;
- constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
- constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
- constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
- constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
-
- // clamp src/dst/constant
- Clamp<type>(src);
- Clamp<type>(src1);
- Clamp<type>(dst);
- Clamp<type>(constColor);
-
- simdvector srcFactor, dstFactor;
- if (pBlendState->independentAlphaBlendEnable)
- {
- GenerateBlendFactor<true, false>(
- (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor,
- constColor,
- src,
- src1,
- dst,
- srcFactor);
-
- GenerateBlendFactor<true, false>(
- (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
- GenerateBlendFactor<false, true>(
- (SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
-
- BlendFunc<true, false>(
- (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
- BlendFunc<false, true>(
- (SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
- }
- else
- {
- GenerateBlendFactor<true, true>(
- (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<true, true>(
- (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
-
- BlendFunc<true, true>(
- (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
deleted file mode 100644
index c399caf239b..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ /dev/null
@@ -1,336 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file clip.cpp
- *
- * @brief Implementation for clipping
- *
- ******************************************************************************/
-
-#include <assert.h>
-
-#include "common/os.h"
-#include "core/clip.h"
-
-float ComputeInterpFactor(float boundaryCoord0, float boundaryCoord1)
-{
- return (boundaryCoord0 / (boundaryCoord0 - boundaryCoord1));
-}
-
-template <SWR_CLIPCODES ClippingPlane>
-inline void intersect(
- int s, // index to first edge vertex v0 in pInPts.
- int p, // index to second edge vertex v1 in pInPts.
- const float* pInPts, // array of all the input positions.
- const float* pInAttribs, // array of all attributes for all vertex. All the attributes for each
- // vertex is contiguous.
- int numInAttribs, // number of attributes per vertex.
- int i, // output index.
- float* pOutPts, // array of output positions. We'll write our new intersection point at i*4.
- float* pOutAttribs) // array of output attributes. We'll write our new attributes at
- // i*numInAttribs.
-{
- float t;
-
- // Find the parameter of the intersection.
- // t = (v1.w - v1.x) / ((v2.x - v1.x) - (v2.w - v1.w)) for x = w (RIGHT) plane, etc.
- const float* v1 = &pInPts[s * 4];
- const float* v2 = &pInPts[p * 4];
-
- switch (ClippingPlane)
- {
- case FRUSTUM_LEFT:
- t = ComputeInterpFactor(v1[3] + v1[0], v2[3] + v2[0]);
- break;
- case FRUSTUM_RIGHT:
- t = ComputeInterpFactor(v1[3] - v1[0], v2[3] - v2[0]);
- break;
- case FRUSTUM_TOP:
- t = ComputeInterpFactor(v1[3] + v1[1], v2[3] + v2[1]);
- break;
- case FRUSTUM_BOTTOM:
- t = ComputeInterpFactor(v1[3] - v1[1], v2[3] - v2[1]);
- break;
- case FRUSTUM_NEAR:
- t = ComputeInterpFactor(v1[2], v2[2]);
- break;
- case FRUSTUM_FAR:
- t = ComputeInterpFactor(v1[3] - v1[2], v2[3] - v2[2]);
- break;
- default:
- SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
- };
-
- const float* a1 = &pInAttribs[s * numInAttribs];
- const float* a2 = &pInAttribs[p * numInAttribs];
-
- float* pOutP = &pOutPts[i * 4];
- float* pOutA = &pOutAttribs[i * numInAttribs];
-
- // Interpolate new position.
- for (int j = 0; j < 4; ++j)
- {
- pOutP[j] = v1[j] + (v2[j] - v1[j]) * t;
- }
-
- // Interpolate Attributes
- for (int attr = 0; attr < numInAttribs; ++attr)
- {
- pOutA[attr] = a1[attr] + (a2[attr] - a1[attr]) * t;
- }
-}
-
-// Checks whether vertex v lies inside clipping plane
-// in homogenous coords check -w < {x,y,z} < w;
-//
-template <SWR_CLIPCODES ClippingPlane>
-inline int inside(const float v[4])
-{
- switch (ClippingPlane)
- {
- case FRUSTUM_LEFT:
- return (v[0] >= -v[3]);
- case FRUSTUM_RIGHT:
- return (v[0] <= v[3]);
- case FRUSTUM_TOP:
- return (v[1] >= -v[3]);
- case FRUSTUM_BOTTOM:
- return (v[1] <= v[3]);
- case FRUSTUM_NEAR:
- return (v[2] >= 0.0f);
- case FRUSTUM_FAR:
- return (v[2] <= v[3]);
- default:
- SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
- return 0;
- }
-}
-
-// Clips a polygon in homogenous coordinates to a particular clipping plane.
-// Takes in vertices of the polygon (InPts) and the clipping plane
-// Puts the vertices of the clipped polygon in OutPts
-// Returns number of points in clipped polygon
-//
-template <SWR_CLIPCODES ClippingPlane>
-int ClipTriToPlane(const float* pInPts,
- int numInPts,
- const float* pInAttribs,
- int numInAttribs,
- float* pOutPts,
- float* pOutAttribs)
-{
- int i = 0; // index number of OutPts, # of vertices in OutPts = i div 4;
-
- for (int j = 0; j < numInPts; ++j)
- {
- int s = j;
- int p = (j + 1) % numInPts;
-
- int s_in = inside<ClippingPlane>(&pInPts[s * 4]);
- int p_in = inside<ClippingPlane>(&pInPts[p * 4]);
-
- // test if vertex is to be added to output vertices
- if (s_in != p_in) // edge crosses clipping plane
- {
- // find point of intersection
- intersect<ClippingPlane>(
- s, p, pInPts, pInAttribs, numInAttribs, i, pOutPts, pOutAttribs);
- i++;
- }
- if (p_in) // 2nd vertex is inside clipping volume, add it to output
- {
- // Copy 2nd vertex position of edge over to output.
- for (int k = 0; k < 4; ++k)
- {
- pOutPts[i * 4 + k] = pInPts[p * 4 + k];
- }
- // Copy 2nd vertex attributes of edge over to output.
- for (int attr = 0; attr < numInAttribs; ++attr)
- {
- pOutAttribs[i * numInAttribs + attr] = pInAttribs[p * numInAttribs + attr];
- }
- i++;
- }
- // edge does not cross clipping plane and vertex outside clipping volume
- // => do not add vertex
- }
- return i;
-}
-
-void ClipRectangles(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId);
- Clipper<SIMD256, 3> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1);
-}
-
-void ClipTriangles(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId);
- Clipper<SIMD256, 3> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1);
-}
-
-void ClipLines(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId);
- Clipper<SIMD256, 2> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1);
-}
-
-void ClipPoints(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId);
- Clipper<SIMD256, 1> clipper(workerId, pDC);
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1);
-}
-
-#if USE_SIMD16_FRONTEND
-void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId);
-
- enum
- {
- VERTS_PER_PRIM = 3
- };
-
- Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
-
- pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1);
-}
-
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId);
-
- enum
- {
- VERTS_PER_PRIM = 3
- };
-
- Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
-
- pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1);
-}
-
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId);
-
- enum
- {
- VERTS_PER_PRIM = 2
- };
-
- Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
-
- pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1);
-}
-
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId);
-
- enum
- {
- VERTS_PER_PRIM = 1
- };
-
- Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
-
- pa.useAlternateOffset = false;
- clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1);
-}
-
-#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
deleted file mode 100644
index d7186ca10b1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ /dev/null
@@ -1,1361 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file clip.h
- *
- * @brief Definitions for clipping
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/simdintrin.h"
-#include "core/context.h"
-#include "core/pa.h"
-#include "rdtsc_core.h"
-
-enum SWR_CLIPCODES
-{
-// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
-// Guardband is able to use a single high-bit with 4 separate LSBs, because it computes a union,
-// rather than intersection, of clipcodes.
-#define CLIPCODE_SHIFT 23
- FRUSTUM_LEFT = (0x01 << CLIPCODE_SHIFT),
- FRUSTUM_TOP = (0x02 << CLIPCODE_SHIFT),
- FRUSTUM_RIGHT = (0x04 << CLIPCODE_SHIFT),
- FRUSTUM_BOTTOM = (0x08 << CLIPCODE_SHIFT),
-
- FRUSTUM_NEAR = (0x10 << CLIPCODE_SHIFT),
- FRUSTUM_FAR = (0x20 << CLIPCODE_SHIFT),
-
- NEGW = (0x40 << CLIPCODE_SHIFT),
-
- GUARDBAND_LEFT = (0x80 << CLIPCODE_SHIFT | 0x1),
- GUARDBAND_TOP = (0x80 << CLIPCODE_SHIFT | 0x2),
- GUARDBAND_RIGHT = (0x80 << CLIPCODE_SHIFT | 0x4),
- GUARDBAND_BOTTOM = (0x80 << CLIPCODE_SHIFT | 0x8)
-};
-
-#define GUARDBAND_CLIP_MASK \
- (FRUSTUM_NEAR | FRUSTUM_FAR | GUARDBAND_LEFT | GUARDBAND_TOP | GUARDBAND_RIGHT | \
- GUARDBAND_BOTTOM | NEGW)
-#define FRUSTUM_CLIP_MASK \
- (FRUSTUM_NEAR | FRUSTUM_FAR | FRUSTUM_LEFT | FRUSTUM_RIGHT | FRUSTUM_TOP | FRUSTUM_BOTTOM)
-
-template <typename SIMD_T>
-void ComputeClipCodes(const API_STATE& state,
- const Vec4<SIMD_T>& vertex,
- Float<SIMD_T>& clipCodes,
- Integer<SIMD_T> const& viewportIndexes)
-{
- clipCodes = SIMD_T::setzero_ps();
-
- // -w
- Float<SIMD_T> vNegW = SIMD_T::mul_ps(vertex.w, SIMD_T::set1_ps(-1.0f));
-
- // FRUSTUM_LEFT
- Float<SIMD_T> vRes = SIMD_T::cmplt_ps(vertex.x, vNegW);
- clipCodes = SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_LEFT)));
-
- // FRUSTUM_TOP
- vRes = SIMD_T::cmplt_ps(vertex.y, vNegW);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_TOP))));
-
- // FRUSTUM_RIGHT
- vRes = SIMD_T::cmpgt_ps(vertex.x, vertex.w);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_RIGHT))));
-
- // FRUSTUM_BOTTOM
- vRes = SIMD_T::cmpgt_ps(vertex.y, vertex.w);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_BOTTOM))));
-
- if (state.rastState.depthClipEnable)
- {
- // FRUSTUM_NEAR
- // DX clips depth [0..w], GL clips [-w..w]
- if (state.rastState.clipHalfZ)
- {
- vRes = SIMD_T::cmplt_ps(vertex.z, SIMD_T::setzero_ps());
- }
- else
- {
- vRes = SIMD_T::cmplt_ps(vertex.z, vNegW);
- }
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_NEAR))));
-
- // FRUSTUM_FAR
- vRes = SIMD_T::cmpgt_ps(vertex.z, vertex.w);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_FAR))));
- }
-
- // NEGW
- vRes = SIMD_T::cmple_ps(vertex.w, SIMD_T::setzero_ps());
- clipCodes =
- SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(NEGW))));
-
- // GUARDBAND_LEFT
- Float<SIMD_T> gbMult = SIMD_T::mul_ps(vNegW,
- SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
- &state.gbState.left[0], viewportIndexes));
- vRes = SIMD_T::cmplt_ps(vertex.x, gbMult);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_LEFT))));
-
- // GUARDBAND_TOP
- gbMult = SIMD_T::mul_ps(vNegW,
- SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
- &state.gbState.top[0], viewportIndexes));
- vRes = SIMD_T::cmplt_ps(vertex.y, gbMult);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_TOP))));
-
- // GUARDBAND_RIGHT
- gbMult = SIMD_T::mul_ps(vertex.w,
- SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
- &state.gbState.right[0], viewportIndexes));
- vRes = SIMD_T::cmpgt_ps(vertex.x, gbMult);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_RIGHT))));
-
- // GUARDBAND_BOTTOM
- gbMult = SIMD_T::mul_ps(vertex.w,
- SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
- &state.gbState.bottom[0], viewportIndexes));
- vRes = SIMD_T::cmpgt_ps(vertex.y, gbMult);
- clipCodes = SIMD_T::or_ps(
- clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_BOTTOM))));
-}
-
-template <typename SIMD_T>
-struct BinnerChooser
-{
-};
-
-template <>
-struct BinnerChooser<SIMD256>
-{
- PFN_PROCESS_PRIMS pfnBinFunc;
-
- BinnerChooser(uint32_t numVertsPerPrim, uint32_t conservativeRast)
- :
- pfnBinFunc(nullptr)
- {
- if (numVertsPerPrim == 3)
- {
- pfnBinFunc = GetBinTrianglesFunc(conservativeRast > 0);
-
- }
- else if (numVertsPerPrim == 2)
- {
- pfnBinFunc = BinLines;
- }
- else
- {
- SWR_ASSERT(0 && "Unexpected points in clipper.");
- }
- }
-
- BinnerChooser(PRIMITIVE_TOPOLOGY topology, uint32_t conservativeRast)
- :
- pfnBinFunc(nullptr)
- {
- switch (topology)
- {
- case TOP_POINT_LIST:
- pfnBinFunc = BinPoints;
- break;
- case TOP_LINE_LIST:
- case TOP_LINE_STRIP:
- case TOP_LINE_LOOP:
- case TOP_LINE_LIST_ADJ:
- case TOP_LISTSTRIP_ADJ:
- pfnBinFunc = BinLines;
- break;
- default:
- pfnBinFunc = GetBinTrianglesFunc(conservativeRast > 0);
- break;
- };
- }
-
- void BinFunc(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- SIMD256::Vec4 prims[],
- uint32_t primMask,
- SIMD256::Integer const& primID,
- SIMD256::Integer& viewportIdx,
- SIMD256::Integer& rtIdx)
- {
- SWR_ASSERT(pfnBinFunc != nullptr);
-
- pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx);
- }
-};
-
-#if USE_SIMD16_FRONTEND
-template <>
-struct BinnerChooser<SIMD512>
-{
- PFN_PROCESS_PRIMS_SIMD16 pfnBinFunc;
-
- BinnerChooser(uint32_t numVertsPerPrim, uint32_t conservativeRast)
- :
- pfnBinFunc(nullptr)
- {
- if (numVertsPerPrim == 3)
- {
- pfnBinFunc = GetBinTrianglesFunc_simd16(conservativeRast > 0);
-
- }
- else if (numVertsPerPrim == 2)
- {
- pfnBinFunc = BinLines_simd16;
- }
- else
- {
- SWR_ASSERT(0 && "Unexpected points in clipper.");
- }
- }
-
- BinnerChooser(PRIMITIVE_TOPOLOGY topology, uint32_t conservativeRast)
- :
- pfnBinFunc(nullptr)
- {
- switch (topology)
- {
- case TOP_POINT_LIST:
- pfnBinFunc = BinPoints_simd16;
- break;
- case TOP_LINE_LIST:
- case TOP_LINE_STRIP:
- case TOP_LINE_LOOP:
- case TOP_LINE_LIST_ADJ:
- case TOP_LISTSTRIP_ADJ:
- pfnBinFunc = BinLines_simd16;
- break;
- default:
- pfnBinFunc = GetBinTrianglesFunc_simd16(conservativeRast > 0);
- break;
- };
- }
-
- void BinFunc(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- SIMD512::Vec4 prims[],
- uint32_t primMask,
- SIMD512::Integer const& primID,
- SIMD512::Integer& viewportIdx,
- SIMD512::Integer& rtIdx)
- {
- SWR_ASSERT(pfnBinFunc != nullptr);
-
- pfnBinFunc(pDC, pa, workerId, prims, primMask, primID, viewportIdx, rtIdx);
- }
-};
-
-#endif
-template <typename SIMD_T>
-struct SimdHelper
-{
-};
-
-template <>
-struct SimdHelper<SIMD256>
-{
- static SIMD256::Float insert_lo_ps(SIMD256::Float a) { return a; }
-
- static SIMD256::Mask cmpeq_ps_mask(SIMD256::Float a, SIMD256::Float b)
- {
- return SIMD256::movemask_ps(SIMD256::cmpeq_ps(a, b));
- }
-};
-
-#if USE_SIMD16_FRONTEND
-template <>
-struct SimdHelper<SIMD512>
-{
- static SIMD512::Float insert_lo_ps(SIMD256::Float a)
- {
- return SIMD512::insert_ps<0>(SIMD512::setzero_ps(), a);
- }
-
- static SIMD512::Mask cmpeq_ps_mask(SIMD512::Float a, SIMD512::Float b)
- {
- return SIMD512::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>(a, b);
- }
-};
-#endif
-
-template <typename SIMD_T, uint32_t NumVertsPerPrimT>
-class Clipper
-{
-public:
- INLINE Clipper(uint32_t in_workerId, DRAW_CONTEXT* in_pDC) :
- workerId(in_workerId), pDC(in_pDC), state(GetApiState(in_pDC))
- {
- static_assert(NumVertsPerPrimT >= 1 && NumVertsPerPrimT <= 3, "Invalid NumVertsPerPrim");
- THREAD_DATA &thread_data = in_pDC->pContext->threadPool.pThreadData[workerId];
-
- if (thread_data.clipperData == nullptr)
- {
- // 7 vertex temp data
- // 7 post-clipped vertices
- // 2 transposed verts for binning
- size_t alloc_size = sizeof(SIMDVERTEX_T<SIMD_T>) * (7 + 7 + 2);
- thread_data.clipperData = AlignedMalloc(alloc_size, KNOB_SIMD16_BYTES);
- }
- SWR_ASSERT(thread_data.clipperData);
-
- this->clippedVerts = (SIMDVERTEX_T<SIMD_T>*)thread_data.clipperData;
- this->tmpVerts = this->clippedVerts + 7;
- this->transposedVerts = this->tmpVerts + 7;
- }
-
- void ComputeClipCodes(Vec4<SIMD_T> vertex[], const Integer<SIMD_T>& viewportIndexes)
- {
- for (uint32_t i = 0; i < NumVertsPerPrimT; ++i)
- {
- ::ComputeClipCodes<SIMD_T>(state, vertex[i], clipCodes[i], viewportIndexes);
- }
- }
-
- Float<SIMD_T> ComputeClipCodeIntersection()
- {
- Float<SIMD_T> result = clipCodes[0];
-
- for (uint32_t i = 1; i < NumVertsPerPrimT; ++i)
- {
- result = SIMD_T::and_ps(result, clipCodes[i]);
- }
-
- return result;
- }
-
- Float<SIMD_T> ComputeClipCodeUnion()
- {
- Float<SIMD_T> result = clipCodes[0];
-
- for (uint32_t i = 1; i < NumVertsPerPrimT; ++i)
- {
- result = SIMD_T::or_ps(result, clipCodes[i]);
- }
-
- return result;
- }
-
- int ComputeClipMask()
- {
- Float<SIMD_T> clipUnion = ComputeClipCodeUnion();
-
- clipUnion =
- SIMD_T::and_ps(clipUnion, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_CLIP_MASK)));
-
- return SIMD_T::movemask_ps(SIMD_T::cmpneq_ps(clipUnion, SIMD_T::setzero_ps()));
- }
-
- // clipper is responsible for culling any prims with NAN coordinates
- int ComputeNaNMask(Vec4<SIMD_T> prim[])
- {
- Float<SIMD_T> vNanMask = SIMD_T::setzero_ps();
-
- for (uint32_t e = 0; e < NumVertsPerPrimT; ++e)
- {
- Float<SIMD_T> vNan01 =
- SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[0], prim[e].v[1]);
- vNanMask = SIMD_T::or_ps(vNanMask, vNan01);
-
- Float<SIMD_T> vNan23 =
- SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[2], prim[e].v[3]);
- vNanMask = SIMD_T::or_ps(vNanMask, vNan23);
- }
-
- return SIMD_T::movemask_ps(vNanMask);
- }
-
- int ComputeUserClipCullMask(PA_STATE& pa, Vec4<SIMD_T> prim[])
- {
- uint8_t cullMask = state.backendState.cullDistanceMask;
- uint32_t vertexClipCullOffset = state.backendState.vertexClipCullOffset;
-
- Float<SIMD_T> vClipCullMask = SIMD_T::setzero_ps();
-
- Vec4<SIMD_T> vClipCullDistLo[3];
- Vec4<SIMD_T> vClipCullDistHi[3];
-
- pa.Assemble(vertexClipCullOffset, vClipCullDistLo);
- pa.Assemble(vertexClipCullOffset + 1, vClipCullDistHi);
-
- unsigned long index;
- while (_BitScanForward(&index, cullMask))
- {
- cullMask &= ~(1 << index);
- uint32_t slot = index >> 2;
- uint32_t component = index & 0x3;
-
- Float<SIMD_T> vCullMaskElem = SIMD_T::set1_ps(-1.0f);
- for (uint32_t e = 0; e < NumVertsPerPrimT; ++e)
- {
- Float<SIMD_T> vCullComp;
- if (slot == 0)
- {
- vCullComp = vClipCullDistLo[e][component];
- }
- else
- {
- vCullComp = vClipCullDistHi[e][component];
- }
-
- // cull if cull distance < 0 || NAN
- Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(
- SIMD_T::setzero_ps(), vCullComp);
- vCullMaskElem = SIMD_T::and_ps(vCullMaskElem, vCull);
- }
- vClipCullMask = SIMD_T::or_ps(vClipCullMask, vCullMaskElem);
- }
-
- // clipper should also discard any primitive with NAN clip distance
- uint8_t clipMask = state.backendState.clipDistanceMask;
- while (_BitScanForward(&index, clipMask))
- {
- clipMask &= ~(1 << index);
- uint32_t slot = index >> 2;
- uint32_t component = index & 0x3;
-
- Float<SIMD_T> vCullMaskElem = SIMD_T::set1_ps(-1.0f);
- for (uint32_t e = 0; e < NumVertsPerPrimT; ++e)
- {
- Float<SIMD_T> vClipComp;
- if (slot == 0)
- {
- vClipComp = vClipCullDistLo[e][component];
- }
- else
- {
- vClipComp = vClipCullDistHi[e][component];
- }
-
- Float<SIMD_T> vClip =
- SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(vClipComp, vClipComp);
- Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(
- SIMD_T::setzero_ps(), vClipComp);
- vCullMaskElem = SIMD_T::and_ps(vCullMaskElem, vCull);
- vClipCullMask = SIMD_T::or_ps(vClipCullMask, vClip);
- }
- vClipCullMask = SIMD_T::or_ps(vClipCullMask, vCullMaskElem);
- }
-
- return SIMD_T::movemask_ps(vClipCullMask);
- }
-
- void ClipSimd(const Vec4<SIMD_T> prim[],
- const Float<SIMD_T>& vPrimMask,
- const Float<SIMD_T>& vClipMask,
- PA_STATE& pa,
- const Integer<SIMD_T>& vPrimId,
- const Integer<SIMD_T>& vViewportIdx,
- const Integer<SIMD_T>& vRtIdx)
- {
- // input/output vertex store for clipper
- SIMDVERTEX_T<SIMD_T>* vertices = this->clippedVerts;
-
- uint32_t constantInterpMask = state.backendState.constantInterpolationMask;
- uint32_t provokingVertex = 0;
- if (pa.binTopology == TOP_TRIANGLE_FAN)
- {
- provokingVertex = state.frontendState.provokingVertex.triFan;
- }
- ///@todo: line topology for wireframe?
-
- // assemble pos
- Vec4<SIMD_T> tmpVector[NumVertsPerPrimT];
- for (uint32_t i = 0; i < NumVertsPerPrimT; ++i)
- {
- vertices[i].attrib[VERTEX_POSITION_SLOT] = prim[i];
- }
-
- // assemble attribs
- const SWR_BACKEND_STATE& backendState = state.backendState;
-
- int32_t maxSlot = -1;
- for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot)
- {
- // Compute absolute attrib slot in vertex array
- uint32_t mapSlot =
- backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
- maxSlot = std::max<int32_t>(maxSlot, mapSlot);
- uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
-
- pa.Assemble(inputSlot, tmpVector);
-
- // if constant interpolation enabled for this attribute, assign the provoking
- // vertex values to all edges
- if (CheckBit(constantInterpMask, slot))
- {
- for (uint32_t i = 0; i < NumVertsPerPrimT; ++i)
- {
- vertices[i].attrib[inputSlot] = tmpVector[provokingVertex];
- }
- }
- else
- {
- for (uint32_t i = 0; i < NumVertsPerPrimT; ++i)
- {
- vertices[i].attrib[inputSlot] = tmpVector[i];
- }
- }
- }
-
- // assemble user clip distances if enabled
- uint32_t vertexClipCullSlot = state.backendState.vertexClipCullOffset;
- if (state.backendState.clipDistanceMask & 0xf)
- {
- pa.Assemble(vertexClipCullSlot, tmpVector);
- for (uint32_t i = 0; i < NumVertsPerPrimT; ++i)
- {
- vertices[i].attrib[vertexClipCullSlot] = tmpVector[i];
- }
- }
-
- if (state.backendState.clipDistanceMask & 0xf0)
- {
- pa.Assemble(vertexClipCullSlot + 1, tmpVector);
- for (uint32_t i = 0; i < NumVertsPerPrimT; ++i)
- {
- vertices[i].attrib[vertexClipCullSlot + 1] = tmpVector[i];
- }
- }
-
- uint32_t numAttribs = maxSlot + 1;
-
- Integer<SIMD_T> vNumClippedVerts =
- ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
-
- BinnerChooser<SIMD_T> binner(NumVertsPerPrimT,
- pa.pDC->pState->state.rastState.conservativeRast);
-
- // set up new PA for binning clipped primitives
- PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
- if (NumVertsPerPrimT == 3)
- {
- clipTopology = TOP_TRIANGLE_FAN;
-
- // so that the binner knows to bloat wide points later
- if (pa.binTopology == TOP_POINT_LIST)
- {
- clipTopology = TOP_POINT_LIST;
- }
- else if (pa.binTopology == TOP_RECT_LIST)
- {
- clipTopology = TOP_RECT_LIST;
- }
- }
- else if (NumVertsPerPrimT == 2)
- {
- clipTopology = TOP_LINE_LIST;
- }
- else
- {
- SWR_ASSERT(0 && "Unexpected points in clipper.");
- }
-
- const uint32_t* pVertexCount = reinterpret_cast<const uint32_t*>(&vNumClippedVerts);
- const uint32_t* pPrimitiveId = reinterpret_cast<const uint32_t*>(&vPrimId);
- const uint32_t* pViewportIdx = reinterpret_cast<const uint32_t*>(&vViewportIdx);
- const uint32_t* pRtIdx = reinterpret_cast<const uint32_t*>(&vRtIdx);
-
- const SIMD256::Integer vOffsets =
- SIMD256::set_epi32(0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
- 6 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 5 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 4 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 3 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 2 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 1 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 0 * sizeof(SIMDVERTEX_T<SIMD_T>));
-
- // only need to gather 7 verts
- // @todo dynamic mask based on actual # of verts generated per lane
- const SIMD256::Float vMask = SIMD256::set_ps(0, -1, -1, -1, -1, -1, -1, -1);
-
- uint32_t numClippedPrims = 0;
-
- // transpose clipper output so that each lane's vertices are in SIMD order
- // set aside space for 2 vertices, as the PA will try to read up to 16 verts
- // for triangle fan
- SIMDVERTEX_T<SIMD_T>* transposedPrims = this->transposedVerts;
-
- uint32_t numInputPrims = pa.NumPrims();
- for (uint32_t inputPrim = 0; inputPrim < numInputPrims; ++inputPrim)
- {
- uint32_t numEmittedVerts = pVertexCount[inputPrim];
- if (numEmittedVerts < NumVertsPerPrimT)
- {
- continue;
- }
- SWR_ASSERT(numEmittedVerts <= 7, "Unexpected vertex count from clipper.");
-
- uint32_t numEmittedPrims = GetNumPrims(clipTopology, numEmittedVerts);
- SWR_ASSERT(numEmittedPrims <= 7, "Unexpected primitive count from clipper.");
-
- numClippedPrims += numEmittedPrims;
-
- // tranpose clipper output so that each lane's vertices are in SIMD order
- // set aside space for 2 vertices, as the PA will try to read up to 16 verts
- // for triangle fan
-
- // transpose pos
- float const* pBase =
- reinterpret_cast<float const*>(&vertices[0].attrib[VERTEX_POSITION_SLOT]) +
- inputPrim;
-
- for (uint32_t c = 0; c < 4; ++c)
- {
- SIMD256::Float temp =
- SIMD256::mask_i32gather_ps(SIMD256::setzero_ps(), pBase, vOffsets, vMask);
- transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] =
- SimdHelper<SIMD_T>::insert_lo_ps(temp);
- pBase = PtrAdd(pBase, sizeof(Float<SIMD_T>));
- }
-
- // transpose attribs
- pBase = reinterpret_cast<float const*>(
- &vertices[0].attrib[backendState.vertexAttribOffset]) +
- inputPrim;
-
- for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
- {
- uint32_t attribSlot = backendState.vertexAttribOffset + attrib;
-
- for (uint32_t c = 0; c < 4; ++c)
- {
- SIMD256::Float temp =
- SIMD256::mask_i32gather_ps(SIMD256::setzero_ps(), pBase, vOffsets, vMask);
- transposedPrims[0].attrib[attribSlot][c] =
- SimdHelper<SIMD_T>::insert_lo_ps(temp);
- pBase = PtrAdd(pBase, sizeof(Float<SIMD_T>));
- }
- }
-
- // transpose user clip distances if enabled
- uint32_t vertexClipCullSlot = backendState.vertexClipCullOffset;
- if (state.backendState.clipDistanceMask & 0x0f)
- {
- pBase = reinterpret_cast<float const*>(&vertices[0].attrib[vertexClipCullSlot]) +
- inputPrim;
-
- for (uint32_t c = 0; c < 4; ++c)
- {
- SIMD256::Float temp =
- SIMD256::mask_i32gather_ps(SIMD256::setzero_ps(), pBase, vOffsets, vMask);
- transposedPrims[0].attrib[vertexClipCullSlot][c] =
- SimdHelper<SIMD_T>::insert_lo_ps(temp);
- pBase = PtrAdd(pBase, sizeof(Float<SIMD_T>));
- }
- }
-
- if (state.backendState.clipDistanceMask & 0xf0)
- {
- pBase =
- reinterpret_cast<float const*>(&vertices[0].attrib[vertexClipCullSlot + 1]) +
- inputPrim;
-
- for (uint32_t c = 0; c < 4; ++c)
- {
- SIMD256::Float temp =
- SIMD256::mask_i32gather_ps(SIMD256::setzero_ps(), pBase, vOffsets, vMask);
- transposedPrims[0].attrib[vertexClipCullSlot + 1][c] =
- SimdHelper<SIMD_T>::insert_lo_ps(temp);
- pBase = PtrAdd(pBase, sizeof(Float<SIMD_T>));
- }
- }
-
- PA_STATE_OPT clipPA(pDC,
- numEmittedPrims,
- reinterpret_cast<uint8_t*>(&transposedPrims[0]),
- numEmittedVerts,
- SWR_VTX_NUM_SLOTS,
- true,
- NumVertsPerPrimT,
- clipTopology);
- clipPA.viewportArrayActive = pa.viewportArrayActive;
- clipPA.rtArrayActive = pa.rtArrayActive;
-
- static const uint32_t primMaskMap[] = {0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f};
-
- const uint32_t primMask = primMaskMap[numEmittedPrims];
-
- const Integer<SIMD_T> primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
- const Integer<SIMD_T> viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
- const Integer<SIMD_T> rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
-
- while (clipPA.GetNextStreamOutput())
- {
- do
- {
- Vec4<SIMD_T> attrib[NumVertsPerPrimT];
-
- bool assemble = clipPA.Assemble(VERTEX_POSITION_SLOT, attrib);
-
- if (assemble)
- {
- binner.pfnBinFunc(
- pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
- }
-
- } while (clipPA.NextPrim());
- }
- }
-
- // update global pipeline stat
- UPDATE_STAT_FE(CPrimitives, numClippedPrims);
- }
-
- void ExecuteStage(PA_STATE& pa,
- Vec4<SIMD_T> prim[],
- uint32_t primMask,
- Integer<SIMD_T> const& primId,
- Integer<SIMD_T> const& viewportIdx,
- Integer<SIMD_T> const& rtIdx)
- {
- SWR_ASSERT(pa.pDC != nullptr);
-
- BinnerChooser<SIMD_T> binner(pa.binTopology,
- pa.pDC->pState->state.rastState.conservativeRast);
-
- // update clipper invocations pipeline stat
- uint32_t numInvoc = _mm_popcnt_u32(primMask);
- UPDATE_STAT_FE(CInvocations, numInvoc);
-
- ComputeClipCodes(prim, viewportIdx);
-
- // cull prims with NAN coords
- primMask &= ~ComputeNaNMask(prim);
-
- // user cull distance cull
- if (state.backendState.cullDistanceMask | state.backendState.clipDistanceMask)
- {
- primMask &= ~ComputeUserClipCullMask(pa, prim);
- }
-
- Float<SIMD_T> clipIntersection = ComputeClipCodeIntersection();
- // Mask out non-frustum codes
- clipIntersection = SIMD_T::and_ps(clipIntersection,
- SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_CLIP_MASK)));
-
- // cull prims outside view frustum
- int validMask =
- primMask & SimdHelper<SIMD_T>::cmpeq_ps_mask(clipIntersection, SIMD_T::setzero_ps());
-
- // skip clipping for points
- uint32_t clipMask = 0;
- if (NumVertsPerPrimT != 1)
- {
- clipMask = validMask & ComputeClipMask();
- }
-
- AR_EVENT(ClipInfoEvent(numInvoc, validMask, clipMask));
-
- if (clipMask)
- {
- RDTSC_BEGIN(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, pa.pDC->drawId);
- // we have to clip tris, execute the clipper, which will also
- // call the binner
- ClipSimd(prim,
- SIMD_T::vmask_ps(validMask),
- SIMD_T::vmask_ps(clipMask),
- pa,
- primId,
- viewportIdx,
- rtIdx);
- RDTSC_END(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, 1);
- }
- else if (validMask)
- {
- // update CPrimitives pipeline state
- UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
-
- // forward valid prims directly to binner
- binner.pfnBinFunc(
- this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
- }
- }
-
-private:
- Float<SIMD_T> ComputeInterpFactor(Float<SIMD_T> const& boundaryCoord0,
- Float<SIMD_T> const& boundaryCoord1)
- {
- return SIMD_T::div_ps(boundaryCoord0, SIMD_T::sub_ps(boundaryCoord0, boundaryCoord1));
- }
-
- Integer<SIMD_T>
- ComputeOffsets(uint32_t attrib, Integer<SIMD_T> const& vIndices, uint32_t component)
- {
- const uint32_t simdVertexStride = sizeof(SIMDVERTEX_T<SIMD_T>);
- const uint32_t componentStride = sizeof(Float<SIMD_T>);
- const uint32_t attribStride = sizeof(Vec4<SIMD_T>);
-
- static const OSALIGNSIMD16(uint32_t) elemOffset[16] = {
- 0 * sizeof(float),
- 1 * sizeof(float),
- 2 * sizeof(float),
- 3 * sizeof(float),
- 4 * sizeof(float),
- 5 * sizeof(float),
- 6 * sizeof(float),
- 7 * sizeof(float),
- 8 * sizeof(float),
- 9 * sizeof(float),
- 10 * sizeof(float),
- 11 * sizeof(float),
- 12 * sizeof(float),
- 13 * sizeof(float),
- 14 * sizeof(float),
- 15 * sizeof(float),
- };
-
- static_assert(sizeof(Integer<SIMD_T>) <= sizeof(elemOffset),
- "Clipper::ComputeOffsets, Increase number of element offsets.");
-
- Integer<SIMD_T> vElemOffset =
- SIMD_T::loadu_si(reinterpret_cast<const Integer<SIMD_T>*>(elemOffset));
-
- // step to the simdvertex
- Integer<SIMD_T> vOffsets =
- SIMD_T::mullo_epi32(vIndices, SIMD_T::set1_epi32(simdVertexStride));
-
- // step to the attribute and component
- vOffsets = SIMD_T::add_epi32(
- vOffsets, SIMD_T::set1_epi32(attribStride * attrib + componentStride * component));
-
- // step to the lane
- vOffsets = SIMD_T::add_epi32(vOffsets, vElemOffset);
-
- return vOffsets;
- }
-
- Float<SIMD_T> GatherComponent(const float* pBuffer,
- uint32_t attrib,
- Float<SIMD_T> const& vMask,
- Integer<SIMD_T> const& vIndices,
- uint32_t component)
- {
- Integer<SIMD_T> vOffsets = ComputeOffsets(attrib, vIndices, component);
- Float<SIMD_T> vSrc = SIMD_T::setzero_ps();
-
- return SIMD_T::mask_i32gather_ps(vSrc, pBuffer, vOffsets, vMask);
- }
-
- void ScatterComponent(const float* pBuffer,
- uint32_t attrib,
- Float<SIMD_T> const& vMask,
- Integer<SIMD_T> const& vIndices,
- uint32_t component,
- Float<SIMD_T> const& vSrc)
- {
- Integer<SIMD_T> vOffsets = ComputeOffsets(attrib, vIndices, component);
-
- const uint32_t* pOffsets = reinterpret_cast<const uint32_t*>(&vOffsets);
- const float* pSrc = reinterpret_cast<const float*>(&vSrc);
- uint32_t mask = SIMD_T::movemask_ps(vMask);
- unsigned long lane;
- while (_BitScanForward(&lane, mask))
- {
- mask &= ~(1 << lane);
- const uint8_t* pBuf = reinterpret_cast<const uint8_t*>(pBuffer) + pOffsets[lane];
- *(float*)pBuf = pSrc[lane];
- }
- }
-
- template <SWR_CLIPCODES ClippingPlane>
- void intersect(const Float<SIMD_T>& vActiveMask, // active lanes to operate on
- const Integer<SIMD_T>& s, // index to first edge vertex v0 in pInPts.
- const Integer<SIMD_T>& p, // index to second edge vertex v1 in pInPts.
- const Vec4<SIMD_T>& v1, // vertex 0 position
- const Vec4<SIMD_T>& v2, // vertex 1 position
- Integer<SIMD_T>& outIndex, // output index.
- const float* pInVerts, // array of all the input positions.
- uint32_t numInAttribs, // number of attributes per vertex.
- float* pOutVerts) // array of output positions. We'll write our new intersection
- // point at i*4.
- {
- uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
- uint32_t vertexClipCullOffset = this->state.backendState.vertexClipCullOffset;
-
- // compute interpolation factor
- Float<SIMD_T> t;
- switch (ClippingPlane)
- {
- case FRUSTUM_LEFT:
- t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[0]), SIMD_T::add_ps(v2[3], v2[0]));
- break;
- case FRUSTUM_RIGHT:
- t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[0]), SIMD_T::sub_ps(v2[3], v2[0]));
- break;
- case FRUSTUM_TOP:
- t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[1]), SIMD_T::add_ps(v2[3], v2[1]));
- break;
- case FRUSTUM_BOTTOM:
- t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[1]), SIMD_T::sub_ps(v2[3], v2[1]));
- break;
- case FRUSTUM_NEAR:
- // DX Znear plane is 0, GL is -w
- if (this->state.rastState.clipHalfZ)
- {
- t = ComputeInterpFactor(v1[2], v2[2]);
- }
- else
- {
- t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[2]), SIMD_T::add_ps(v2[3], v2[2]));
- }
- break;
- case FRUSTUM_FAR:
- t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[2]), SIMD_T::sub_ps(v2[3], v2[2]));
- break;
- default:
- SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
- };
-
- // interpolate position and store
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vOutPos = SIMD_T::fmadd_ps(SIMD_T::sub_ps(v2[c], v1[c]), t, v1[c]);
- ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, vActiveMask, outIndex, c, vOutPos);
- }
-
- // interpolate attributes and store
- for (uint32_t a = 0; a < numInAttribs; ++a)
- {
- uint32_t attribSlot = vertexAttribOffset + a;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
- Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
- Float<SIMD_T> vOutAttrib =
- SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
- ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
- }
- }
-
- // interpolate clip distance if enabled
- if (this->state.backendState.clipDistanceMask & 0xf)
- {
- uint32_t attribSlot = vertexClipCullOffset;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
- Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
- Float<SIMD_T> vOutAttrib =
- SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
- ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
- }
- }
-
- if (this->state.backendState.clipDistanceMask & 0xf0)
- {
- uint32_t attribSlot = vertexClipCullOffset + 1;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
- Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
- Float<SIMD_T> vOutAttrib =
- SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
- ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
- }
- }
- }
-
- template <SWR_CLIPCODES ClippingPlane>
- Float<SIMD_T> inside(const Vec4<SIMD_T>& v)
- {
- switch (ClippingPlane)
- {
- case FRUSTUM_LEFT:
- return SIMD_T::cmpge_ps(v[0], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
- case FRUSTUM_RIGHT:
- return SIMD_T::cmple_ps(v[0], v[3]);
- case FRUSTUM_TOP:
- return SIMD_T::cmpge_ps(v[1], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
- case FRUSTUM_BOTTOM:
- return SIMD_T::cmple_ps(v[1], v[3]);
- case FRUSTUM_NEAR:
- return SIMD_T::cmpge_ps(v[2],
- this->state.rastState.clipHalfZ
- ? SIMD_T::setzero_ps()
- : SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
- case FRUSTUM_FAR:
- return SIMD_T::cmple_ps(v[2], v[3]);
- default:
- SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
- return SIMD_T::setzero_ps();
- }
- }
-
- template <SWR_CLIPCODES ClippingPlane>
- Integer<SIMD_T> ClipTriToPlane(const float* pInVerts,
- const Integer<SIMD_T>& vNumInPts,
- uint32_t numInAttribs,
- float* pOutVerts)
- {
- uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
-
- Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
- Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
- Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
-
- while (!SIMD_T::testz_ps(vActiveMask, vActiveMask)) // loop until activeMask is empty
- {
- Integer<SIMD_T> s = vCurIndex;
- Integer<SIMD_T> p = SIMD_T::add_epi32(s, SIMD_T::set1_epi32(1));
- Integer<SIMD_T> underFlowMask = SIMD_T::cmpgt_epi32(vNumInPts, p);
- p = SIMD_T::castps_si(SIMD_T::blendv_ps(
- SIMD_T::setzero_ps(), SIMD_T::castsi_ps(p), SIMD_T::castsi_ps(underFlowMask)));
-
- // gather position
- Vec4<SIMD_T> vInPos0, vInPos1;
- for (uint32_t c = 0; c < 4; ++c)
- {
- vInPos0[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, s, c);
- vInPos1[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, p, c);
- }
-
- // compute inside mask
- Float<SIMD_T> s_in = inside<ClippingPlane>(vInPos0);
- Float<SIMD_T> p_in = inside<ClippingPlane>(vInPos1);
-
- // compute intersection mask (s_in != p_in)
- Float<SIMD_T> intersectMask = SIMD_T::xor_ps(s_in, p_in);
- intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
-
- // store s if inside
- s_in = SIMD_T::and_ps(s_in, vActiveMask);
- if (!SIMD_T::testz_ps(s_in, s_in))
- {
- // store position
- for (uint32_t c = 0; c < 4; ++c)
- {
- ScatterComponent(
- pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
- }
-
- // store attribs
- for (uint32_t a = 0; a < numInAttribs; ++a)
- {
- uint32_t attribSlot = vertexAttribOffset + a;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
- ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
- }
- }
-
- // store clip distance if enabled
- uint32_t vertexClipCullSlot = this->state.backendState.vertexClipCullOffset;
- if (this->state.backendState.clipDistanceMask & 0xf)
- {
- uint32_t attribSlot = vertexClipCullSlot;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
- ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
- }
- }
-
- if (this->state.backendState.clipDistanceMask & 0xf0)
- {
- uint32_t attribSlot = vertexClipCullSlot + 1;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
- ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
- }
- }
-
- // increment outIndex
- vOutIndex = SIMD_T::blendv_epi32(
- vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
- }
-
- // compute and store intersection
- if (!SIMD_T::testz_ps(intersectMask, intersectMask))
- {
- intersect<ClippingPlane>(intersectMask,
- s,
- p,
- vInPos0,
- vInPos1,
- vOutIndex,
- pInVerts,
- numInAttribs,
- pOutVerts);
-
- // increment outIndex for active lanes
- vOutIndex = SIMD_T::blendv_epi32(
- vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
- }
-
- // increment loop index and update active mask
- vCurIndex = SIMD_T::add_epi32(vCurIndex, SIMD_T::set1_epi32(1));
- vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
- }
-
- return vOutIndex;
- }
-
- template <SWR_CLIPCODES ClippingPlane>
- Integer<SIMD_T> ClipLineToPlane(const float* pInVerts,
- const Integer<SIMD_T>& vNumInPts,
- uint32_t numInAttribs,
- float* pOutVerts)
- {
- uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
-
- Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
- Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
- Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
-
- if (!SIMD_T::testz_ps(vActiveMask, vActiveMask))
- {
- Integer<SIMD_T> s = vCurIndex;
- Integer<SIMD_T> p = SIMD_T::add_epi32(s, SIMD_T::set1_epi32(1));
-
- // gather position
- Vec4<SIMD_T> vInPos0, vInPos1;
- for (uint32_t c = 0; c < 4; ++c)
- {
- vInPos0[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, s, c);
- vInPos1[c] = GatherComponent(pInVerts, VERTEX_POSITION_SLOT, vActiveMask, p, c);
- }
-
- // compute inside mask
- Float<SIMD_T> s_in = inside<ClippingPlane>(vInPos0);
- Float<SIMD_T> p_in = inside<ClippingPlane>(vInPos1);
-
- // compute intersection mask (s_in != p_in)
- Float<SIMD_T> intersectMask = SIMD_T::xor_ps(s_in, p_in);
- intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
-
- // store s if inside
- s_in = SIMD_T::and_ps(s_in, vActiveMask);
- if (!SIMD_T::testz_ps(s_in, s_in))
- {
- for (uint32_t c = 0; c < 4; ++c)
- {
- ScatterComponent(
- pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
- }
-
- // interpolate attributes and store
- for (uint32_t a = 0; a < numInAttribs; ++a)
- {
- uint32_t attribSlot = vertexAttribOffset + a;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib = GatherComponent(pInVerts, attribSlot, s_in, s, c);
- ScatterComponent(pOutVerts, attribSlot, s_in, vOutIndex, c, vAttrib);
- }
- }
-
- // increment outIndex
- vOutIndex = SIMD_T::blendv_epi32(
- vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
- }
-
- // compute and store intersection
- if (!SIMD_T::testz_ps(intersectMask, intersectMask))
- {
- intersect<ClippingPlane>(intersectMask,
- s,
- p,
- vInPos0,
- vInPos1,
- vOutIndex,
- pInVerts,
- numInAttribs,
- pOutVerts);
-
- // increment outIndex for active lanes
- vOutIndex = SIMD_T::blendv_epi32(
- vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
- }
-
- // store p if inside
- p_in = SIMD_T::and_ps(p_in, vActiveMask);
- if (!SIMD_T::testz_ps(p_in, p_in))
- {
- for (uint32_t c = 0; c < 4; ++c)
- {
- ScatterComponent(
- pOutVerts, VERTEX_POSITION_SLOT, p_in, vOutIndex, c, vInPos1[c]);
- }
-
- // interpolate attributes and store
- for (uint32_t a = 0; a < numInAttribs; ++a)
- {
- uint32_t attribSlot = vertexAttribOffset + a;
- for (uint32_t c = 0; c < 4; ++c)
- {
- Float<SIMD_T> vAttrib = GatherComponent(pInVerts, attribSlot, p_in, p, c);
- ScatterComponent(pOutVerts, attribSlot, p_in, vOutIndex, c, vAttrib);
- }
- }
-
- // increment outIndex
- vOutIndex = SIMD_T::blendv_epi32(
- vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), p_in);
- }
- }
-
- return vOutIndex;
- }
-
- Integer<SIMD_T> ClipPrims(float* pVertices,
- const Float<SIMD_T>& vPrimMask,
- const Float<SIMD_T>& vClipMask,
- int numAttribs)
- {
- // temp storage
- float* pTempVerts = reinterpret_cast<float*>(this->tmpVerts);
-
- // zero out num input verts for non-active lanes
- Integer<SIMD_T> vNumInPts = SIMD_T::set1_epi32(NumVertsPerPrimT);
- vNumInPts = SIMD_T::blendv_epi32(SIMD_T::setzero_si(), vNumInPts, vClipMask);
-
- // clip prims to frustum
- Integer<SIMD_T> vNumOutPts;
- if (NumVertsPerPrimT == 3)
- {
- vNumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
- vNumOutPts = ClipTriToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts =
- ClipTriToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts =
- ClipTriToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts =
- ClipTriToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts = ClipTriToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- }
- else
- {
- SWR_ASSERT(NumVertsPerPrimT == 2);
- vNumOutPts =
- ClipLineToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
- vNumOutPts =
- ClipLineToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts =
- ClipLineToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts =
- ClipLineToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts =
- ClipLineToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts =
- ClipLineToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- }
-
- // restore num verts for non-clipped, active lanes
- Float<SIMD_T> vNonClippedMask = SIMD_T::andnot_ps(vClipMask, vPrimMask);
- vNumOutPts =
- SIMD_T::blendv_epi32(vNumOutPts, SIMD_T::set1_epi32(NumVertsPerPrimT), vNonClippedMask);
-
- return vNumOutPts;
- }
-
- const uint32_t workerId{0};
- DRAW_CONTEXT* pDC{nullptr};
- const API_STATE& state;
- Float<SIMD_T> clipCodes[NumVertsPerPrimT];
- SIMDVERTEX_T<SIMD_T>* clippedVerts;
- SIMDVERTEX_T<SIMD_T>* tmpVerts;
- SIMDVERTEX_T<SIMD_T>* transposedVerts;
-};
-
-// pipeline stage functions
-void ClipRectangles(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-void ClipTriangles(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-void ClipLines(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-void ClipPoints(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primId,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-#if USE_SIMD16_FRONTEND
-void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primId,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
deleted file mode 100644
index 9e7f96cdeac..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file conservativerast.h
- *
- ******************************************************************************/
-#pragma once
-#include <type_traits>
-#include "common/simdintrin.h"
-
-enum FixedPointFmt
-{
- FP_UNINIT,
- _16_8,
- _16_9,
- _X_16,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for supported Fixed Point precisions
-typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
-typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
-typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
-typedef std::integral_constant<uint32_t, _X_16> Fixed_X_16;
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct FixedPointTraits
-/// @brief holds constants relating to converting between FP and Fixed point
-/// @tparam FT: fixed precision type
-template <typename FT>
-struct FixedPointTraits
-{
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Fixed_16_8 specialization of FixedPointTraits
-template <>
-struct FixedPointTraits<Fixed_16_8>
-{
- /// multiplier to go from FP32 to Fixed Point 16.8
- typedef std::integral_constant<uint32_t, 256> ScaleT;
- /// number of bits to shift to go from 16.8 fixed => int32
- typedef std::integral_constant<uint32_t, 8> BitsT;
- typedef Fixed_16_8 TypeT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Fixed_16_9 specialization of FixedPointTraits
-template <>
-struct FixedPointTraits<Fixed_16_9>
-{
- /// multiplier to go from FP32 to Fixed Point 16.9
- typedef std::integral_constant<uint32_t, 512> ScaleT;
- /// number of bits to shift to go from 16.9 fixed => int32
- typedef std::integral_constant<uint32_t, 9> BitsT;
- typedef Fixed_16_9 TypeT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Fixed_16_9 specialization of FixedPointTraits
-template <>
-struct FixedPointTraits<Fixed_X_16>
-{
- /// multiplier to go from FP32 to Fixed Point X.16
- typedef std::integral_constant<uint32_t, 65536> ScaleT;
- /// number of bits to shift to go from X.16 fixed => int32
- typedef std::integral_constant<uint32_t, 16> BitsT;
- typedef Fixed_X_16 TypeT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for conservative rasterization modes
-typedef std::false_type StandardRastT;
-typedef std::true_type ConservativeRastT;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for Input Coverage rasterization modes
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT;
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
- InnerConservativeCoverageT;
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct ConservativeRastTraits
-/// @brief primary ConservativeRastTraits template. Shouldn't be instantiated
-/// @tparam ConservativeT: type of conservative rasterization
-template <typename ConservativeT>
-struct ConservativeRastFETraits
-{
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief StandardRast specialization of ConservativeRastTraits
-template <>
-struct ConservativeRastFETraits<StandardRastT>
-{
- typedef std::false_type IsConservativeT;
- typedef std::integral_constant<uint32_t, 0> BoundingBoxOffsetT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ConservativeRastT specialization of ConservativeRastTraits
-template <>
-struct ConservativeRastFETraits<ConservativeRastT>
-{
- typedef std::true_type IsConservativeT;
- typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for ConservativeRastFETraits
-typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
-typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct ConservativeRastBETraits
-/// @brief primary ConservativeRastBETraits template. Shouldn't be instantiated;
-/// default to standard rasterization behavior
-/// @tparam ConservativeT: type of conservative rasterization
-/// @tparam InputCoverageT: type of input coverage requested, if any
-template <typename ConservativeT, typename _InputCoverageT>
-struct ConservativeRastBETraits
-{
- typedef std::false_type IsConservativeT;
- typedef _InputCoverageT InputCoverageT;
- typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
- typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
- typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief StandardRastT specialization of ConservativeRastBETraits
-template <typename _InputCoverageT>
-struct ConservativeRastBETraits<StandardRastT, _InputCoverageT>
-{
- typedef std::false_type IsConservativeT;
- typedef _InputCoverageT InputCoverageT;
- typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
- typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
- typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ConservativeRastT specialization of ConservativeRastBETraits
-/// with no input coverage
-template <>
-struct ConservativeRastBETraits<ConservativeRastT, NoInputCoverageT>
-{
- typedef std::true_type IsConservativeT;
- typedef NoInputCoverageT InputCoverageT;
-
- typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
-
- /// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
- /// of of having to compare individual edges to pixel corners to check if any part of the
- /// triangle intersects a pixel
- typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
- ConservativeEdgeOffsetT;
- typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ConservativeRastT specialization of ConservativeRastBETraits
-/// with OuterConservativeCoverage
-template <>
-struct ConservativeRastBETraits<ConservativeRastT, OuterConservativeCoverageT>
-{
- typedef std::true_type IsConservativeT;
- typedef OuterConservativeCoverageT InputCoverageT;
-
- typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
-
- /// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
- /// of of having to compare individual edges to pixel corners to check if any part of the
- /// triangle intersects a pixel
- typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
- ConservativeEdgeOffsetT;
- typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ConservativeRastT specialization of ConservativeRastBETraits
-/// with InnerConservativeCoverage
-template <>
-struct ConservativeRastBETraits<ConservativeRastT, InnerConservativeCoverageT>
-{
- typedef std::true_type IsConservativeT;
- typedef InnerConservativeCoverageT InputCoverageT;
-
- typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
-
- /// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
- /// of of having to compare individual edges to pixel corners to check if any part of the
- /// triangle intersects a pixel
- typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
- ConservativeEdgeOffsetT;
-
- /// undo the outer conservative offset and offset edge towards from pixel center by 1/2 pixel +
- /// 1/512, in Fixed 16.9 precision this allows the rasterizer to do the 3 edge coverage tests
- /// against a single point, instead of of having to compare individual edges to pixel corners to
- /// check if a pixel is fully covered by a triangle
- typedef std::integral_constant<int32_t,
- static_cast<int32_t>(
- -((ConservativePrecisionT::ScaleT::value / 2) + 1) -
- ConservativeEdgeOffsetT::value)>
- InnerConservativeEdgeOffsetT;
-}; \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
deleted file mode 100644
index b874520b9d8..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ /dev/null
@@ -1,608 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file context.h
- *
- * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
- * The SWR_CONTEXT is our global context and contains the DC ring,
- * thread state, etc.
- *
- * The DRAW_CONTEXT contains all state associated with a draw operation.
- *
- ******************************************************************************/
-#pragma once
-
-#include <condition_variable>
-#include <algorithm>
-
-#include "core/api.h"
-#include "core/utils.h"
-#include "core/arena.h"
-#include "core/fifo.hpp"
-#include "core/knobs.h"
-#include "common/intrin.h"
-#include "common/rdtsc_buckets.h"
-#include "core/threads.h"
-#include "ringbuffer.h"
-#include "archrast/archrast.h"
-
-// x.8 fixed point precision values
-#define FIXED_POINT_SHIFT 8
-#define FIXED_POINT_SCALE 256
-
-// x.16 fixed point precision values
-#define FIXED_POINT16_SHIFT 16
-#define FIXED_POINT16_SCALE 65536
-
-struct SWR_CONTEXT;
-struct DRAW_CONTEXT;
-
-struct TRI_FLAGS
-{
- uint32_t frontFacing : 1;
- uint32_t yMajor : 1;
- uint32_t coverageMask : (SIMD_TILE_X_DIM* SIMD_TILE_Y_DIM);
- uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
- float pointSize;
- uint32_t renderTargetArrayIndex;
- uint32_t viewportIndex;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TRIANGLE_DESC
-/////////////////////////////////////////////////////////////////////////
-struct SWR_TRIANGLE_DESC
-{
- float I[3];
- float J[3];
- float Z[3];
- float OneOverW[3];
- float recipDet;
-
- float* pRecipW;
- float* pAttribs;
- float* pPerspAttribs;
- float* pSamplePos;
- float* pUserClipBuffer;
-
- uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
- uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if
- // entire pixel is covered
- uint64_t anyCoveredSamples;
-
- TRI_FLAGS triFlags;
-};
-
-struct TRIANGLE_WORK_DESC
-{
- float* pTriBuffer;
- float* pAttribs;
- float* pUserClipBuffer;
- uint32_t numAttribs;
- TRI_FLAGS triFlags;
-};
-
-struct CLEAR_DESC
-{
- SWR_RECT rect;
- uint32_t attachmentMask;
- uint32_t renderTargetArrayIndex;
- float clearRTColor[4]; // RGBA_32F
- float clearDepth; // [0..1]
- uint8_t clearStencil;
-};
-
-struct DISCARD_INVALIDATE_TILES_DESC
-{
- uint32_t attachmentMask;
- SWR_RECT rect;
- SWR_TILE_STATE newTileState;
- bool createNewTiles;
- bool fullTilesOnly;
-};
-
-struct SYNC_DESC
-{
- PFN_CALLBACK_FUNC pfnCallbackFunc;
- uint64_t userData;
- uint64_t userData2;
- uint64_t userData3;
-};
-
-struct STORE_TILES_DESC
-{
- uint32_t attachmentMask;
- SWR_TILE_STATE postStoreTileState;
- SWR_RECT rect;
-};
-
-struct COMPUTE_DESC
-{
- uint32_t threadGroupCountX;
- uint32_t threadGroupCountY;
- uint32_t threadGroupCountZ;
- bool enableThreadDispatch;
-};
-
-typedef void (*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroTile,
- void* pDesc);
-
-enum WORK_TYPE
-{
- SYNC,
- DRAW,
- CLEAR,
- DISCARDINVALIDATETILES,
- STORETILES,
- SHUTDOWN,
-};
-
-OSALIGNSIMD(struct) BE_WORK
-{
- WORK_TYPE type;
- PFN_WORK_FUNC pfnWork;
- union
- {
- SYNC_DESC sync;
- TRIANGLE_WORK_DESC tri;
- CLEAR_DESC clear;
- DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
- STORE_TILES_DESC storeTiles;
- } desc;
-};
-
-struct DRAW_WORK
-{
- DRAW_CONTEXT* pDC;
- union
- {
- uint32_t numIndices; // DrawIndexed: Number of indices for draw.
- uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
- };
- union
- {
- gfxptr_t xpIB; // DrawIndexed: App supplied int32 indices
- uint32_t startVertex; // Draw: Starting vertex in VB to render from.
- };
- int32_t baseVertex;
- uint32_t numInstances; // Number of instances
- uint32_t startInstance; // Instance offset
- uint32_t startPrimID; // starting primitiveID for this draw batch
- uint32_t
- startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
- SWR_FORMAT type; // index buffer type
-};
-
-typedef void (*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t workerId,
- void* pDesc);
-struct FE_WORK
-{
- WORK_TYPE type;
- PFN_FE_WORK_FUNC pfnWork;
- union
- {
- SYNC_DESC sync;
- DRAW_WORK draw;
- CLEAR_DESC clear;
- DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
- STORE_TILES_DESC storeTiles;
- } desc;
-};
-
-struct GUARDBANDS
-{
- float left[KNOB_NUM_VIEWPORTS_SCISSORS];
- float right[KNOB_NUM_VIEWPORTS_SCISSORS];
- float top[KNOB_NUM_VIEWPORTS_SCISSORS];
- float bottom[KNOB_NUM_VIEWPORTS_SCISSORS];
-};
-
-struct PA_STATE;
-
-// function signature for pipeline stages that execute after primitive assembly
-typedef void (*PFN_PROCESS_PRIMS)(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[],
- uint32_t primMask,
- simdscalari const& primID,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-
-// function signature for pipeline stages that execute after primitive assembly
-typedef void(SIMDCALL* PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[],
- uint32_t primMask,
- simd16scalari const& primID,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-
-OSALIGNLINE(struct) API_STATE
-{
- // Vertex Buffers
- SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
-
- // GS - Geometry Shader State
- SWR_GS_STATE gsState;
- PFN_GS_FUNC pfnGsFunc;
-
- // FS - Fetch Shader State
- PFN_FETCH_FUNC pfnFetchFunc;
-
- // VS - Vertex Shader State
- PFN_VERTEX_FUNC pfnVertexFunc;
-
- // Index Buffer
- SWR_INDEX_BUFFER_STATE indexBuffer;
-
- // CS - Compute Shader
- PFN_CS_FUNC pfnCsFunc;
- uint32_t totalThreadsInGroup;
- uint32_t totalSpillFillSize;
- uint32_t scratchSpaceSizePerWarp;
- uint32_t scratchSpaceNumWarps;
-
- // FE - Frontend State
- SWR_FRONTEND_STATE frontendState;
-
- // SOS - Streamout Shader State
- PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
-
- // Streamout state
- SWR_STREAMOUT_STATE soState;
- mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
- mutable SWR_STREAMOUT_BUFFER soPausedBuffer[MAX_SO_STREAMS];
-
- // Tessellation State
- PFN_HS_FUNC pfnHsFunc;
- PFN_DS_FUNC pfnDsFunc;
- SWR_TS_STATE tsState;
-
- // Number of attributes used by the frontend (vs, so, gs)
- uint32_t feNumAttributes;
-
- // RS - Rasterizer State
- SWR_RASTSTATE rastState;
- // floating point multisample offsets
- float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
-
- GUARDBANDS gbState;
-
- SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_VIEWPORT_MATRICES vpMatrices;
-
- SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
- bool scissorsTileAligned;
-
- bool forceFront;
- PRIMITIVE_TOPOLOGY topology;
-
-
- // Backend state
- OSALIGNLINE(SWR_BACKEND_STATE) backendState;
-
- SWR_DEPTH_BOUNDS_STATE depthBoundsState;
-
- // PS - Pixel shader state
- SWR_PS_STATE psState;
-
- SWR_DEPTH_STENCIL_STATE depthStencilState;
-
- // OM - Output Merger State
- SWR_BLEND_STATE blendState;
- PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
-
- struct
- {
- uint32_t enableStatsFE : 1; // Enable frontend pipeline stats
- uint32_t enableStatsBE : 1; // Enable backend pipeline stats
- uint32_t colorHottileEnable : 8; // Bitmask of enabled color hottiles
- uint32_t depthHottileEnable : 1; // Enable depth buffer hottile
- uint32_t stencilHottileEnable : 1; // Enable stencil buffer hottile
- };
-
- PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
-};
-
-class MacroTileMgr;
-class DispatchQueue;
-class HOTTILE;
-
-struct RenderOutputBuffers
-{
- uint8_t* pColor[SWR_NUM_RENDERTARGETS];
- uint8_t* pDepth;
- uint8_t* pStencil;
-
- HOTTILE* pColorHotTile[SWR_NUM_RENDERTARGETS];
- HOTTILE* pDepthHotTile;
- HOTTILE* pStencilHotTile;
-};
-
-// Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
-struct BarycentricCoeffs
-{
- simdscalar vIa;
- simdscalar vIb;
- simdscalar vIc;
-
- simdscalar vJa;
- simdscalar vJb;
- simdscalar vJc;
-
- simdscalar vZa;
- simdscalar vZb;
- simdscalar vZc;
-
- simdscalar vRecipDet;
-
- simdscalar vAOneOverW;
- simdscalar vBOneOverW;
- simdscalar vCOneOverW;
-};
-
-// pipeline function pointer types
-typedef void (*PFN_BACKEND_FUNC)(
- DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
-typedef void (*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT&,
- uint8_t* (&)[SWR_NUM_RENDERTARGETS],
- uint32_t,
- const SWR_BLEND_STATE*,
- const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS],
- simdscalar&,
- simdscalar const&);
-typedef void (*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
-typedef void (*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
-typedef void (*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&,
- SWR_PS_CONTEXT&,
- const uint64_t* const,
- const uint32_t,
- simdscalar const&,
- simdscalar const&);
-
-struct BACKEND_FUNCS
-{
- PFN_BACKEND_FUNC pfnBackend;
-};
-
-// Draw State
-struct DRAW_STATE
-{
- API_STATE state;
-
- void* pPrivateState; // Its required the driver sets this up for each draw.
-
- // pipeline function pointers, filled in by API thread when setting up the draw
- BACKEND_FUNCS backendFuncs;
- PFN_PROCESS_PRIMS pfnProcessPrims;
-#if USE_SIMD16_FRONTEND
- PFN_PROCESS_PRIMS_SIMD16 pfnProcessPrims_simd16;
-#endif
-
- CachingArena* pArena; // This should only be used by API thread.
-};
-
-struct DRAW_DYNAMIC_STATE
-{
- void Reset(uint32_t numThreads)
- {
- SWR_STATS* pSavePtr = pStats;
- memset(this, 0, sizeof(*this));
- pStats = pSavePtr;
- memset(pStats, 0, sizeof(SWR_STATS) * numThreads);
- }
- ///@todo Currently assumes only a single FE can do stream output for a draw.
- uint32_t SoWriteOffset[4];
- bool SoWriteOffsetDirty[4];
-
- SWR_STATS_FE statsFE; // Only one FE thread per DC.
- SWR_STATS* pStats;
- uint64_t soPrims; // number of primitives written to StreamOut buffer
-};
-
-// Draw Context
-// The api thread sets up a draw context that exists for the life of the draw.
-// This draw context maintains all of the state needed for the draw operation.
-struct DRAW_CONTEXT
-{
- SWR_CONTEXT* pContext;
- union
- {
- MacroTileMgr* pTileMgr;
- DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
- };
- DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
- CachingArena* pArena;
-
- uint32_t drawId;
- bool dependentFE; // Frontend work is dependent on all previous FE
- bool dependent; // Backend work is dependent on all previous BE
- bool isCompute; // Is this DC a compute context?
- bool cleanupState; // True if this is the last draw using an entry in the state ring.
-
- FE_WORK FeWork;
-
- SYNC_DESC retireCallback; // Call this func when this DC is retired.
-
- DRAW_DYNAMIC_STATE dynState;
-
- volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
- volatile OSALIGNLINE(uint32_t) FeLock;
- volatile OSALIGNLINE(uint32_t) threadsDone;
-};
-
-static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
-
-INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
-{
- SWR_ASSERT(pDC != nullptr);
- SWR_ASSERT(pDC->pState != nullptr);
-
- return pDC->pState->state;
-}
-
-INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
-{
- SWR_ASSERT(pDC != nullptr);
- SWR_ASSERT(pDC->pState != nullptr);
-
- return pDC->pState->pPrivateState;
-}
-
-class HotTileMgr;
-
-struct SWR_CONTEXT
-{
- // Draw Context Ring
- // Each draw needs its own state in order to support multiple draws in flight across multiple
- // threads. We maintain N draw contexts configured as a ring. The size of the ring limits the
- // maximum number of draws that can be in flight at any given time.
- //
- // Description:
- // 1. State - When an application first sets state we'll request a new draw context to use.
- // a. If there are no available draw contexts then we'll have to wait until one becomes
- // free. b. If one is available then set pCurDrawContext to point to it and mark it in use.
- // c. All state calls set state on pCurDrawContext.
- // 2. Draw - Creates submits a work item that is associated with current draw context.
- // a. Set pPrevDrawContext = pCurDrawContext
- // b. Set pCurDrawContext to NULL.
- // 3. State - When an applications sets state after draw
- // a. Same as step 1.
- // b. State is copied from prev draw context to current.
- RingBuffer<DRAW_CONTEXT> dcRing;
-
- DRAW_CONTEXT* pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
- DRAW_CONTEXT* pPrevDrawContext; // This points to DC entry for the previous context submitted
- // that we can copy state from.
-
- MacroTileMgr* pMacroTileManagerArray;
- DispatchQueue* pDispatchQueueArray;
-
- // Draw State Ring
- // When draw are very large (lots of primitives) then the API thread will break these up.
- // These split draws all have identical state. So instead of storing the state directly
- // in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
- // to reference a single entry in the DS ring.
- RingBuffer<DRAW_STATE> dsRing;
-
- uint32_t curStateId; // Current index to the next available entry in the DS ring.
-
- uint32_t NumWorkerThreads;
- uint32_t NumFEThreads;
- uint32_t NumBEThreads;
-
- THREAD_POOL threadPool; // Thread pool associated with this context
- SWR_THREADING_INFO threadInfo;
- SWR_API_THREADING_INFO apiThreadInfo;
- SWR_WORKER_PRIVATE_STATE workerPrivateState;
-
- uint32_t MAX_DRAWS_IN_FLIGHT;
-
- std::condition_variable FifosNotEmpty;
- std::mutex WaitLock;
-
- uint32_t privateStateSize;
-
- HotTileMgr* pHotTileMgr;
-
- // Callback functions, passed in at create context time
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
- PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
- PFN_MAKE_GFXPTR pfnMakeGfxPtr;
- PFN_CREATE_MEMORY_CONTEXT pfnCreateMemoryContext;
- PFN_DESTROY_MEMORY_CONTEXT pfnDestroyMemoryContext;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
- PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
- PFN_UPDATE_STREAMOUT pfnUpdateStreamOut;
-
-
- // Global Stats
- SWR_STATS* pStats;
-
- // Scratch space for workers.
- uint8_t** ppScratch;
-
- volatile OSALIGNLINE(uint32_t) drawsOutstandingFE;
-
- OSALIGNLINE(CachingAllocator) cachingArenaAllocator;
- uint32_t frameCount;
-
- uint32_t lastFrameChecked;
- uint64_t lastDrawChecked;
- TileSet* pSingleThreadLockedTiles;
-
- // ArchRast thread contexts.
- HANDLE* pArContext;
-
- // handle to external memory for worker data to create memory contexts
- HANDLE hExternalMemory;
-
- BucketManager *pBucketMgr;
-};
-
-#define UPDATE_STAT_BE(name, count) \
- if (GetApiState(pDC).enableStatsBE) \
- { \
- pDC->dynState.pStats[workerId].name += count; \
- }
-#define UPDATE_STAT_FE(name, count) \
- if (GetApiState(pDC).enableStatsFE) \
- { \
- pDC->dynState.statsFE.name += count; \
- }
-
-// ArchRast instrumentation framework
-#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
-#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
-
-#ifdef KNOB_ENABLE_RDTSC
-#define RDTSC_BEGIN(pBucketMgr, type, drawid) RDTSC_START(pBucketMgr, type)
-#define RDTSC_END(pBucketMgr, type, count) RDTSC_STOP(pBucketMgr, type, count, 0)
-#else
-#define RDTSC_BEGIN(pBucketMgr, type, drawid)
-#define RDTSC_END(pBucketMgr, type, count)
-#endif
-
-#ifdef KNOB_ENABLE_AR
-#define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
-#define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id)
-#else
-#define _AR_EVENT(ctx, event)
-#define _AR_FLUSH(ctx, id)
-#endif
-
-// Use these macros for api thread.
-#define AR_API_EVENT(event) _AR_EVENT(AR_API_CTX, event)
-
-// Use these macros for worker threads.
-#define AR_EVENT(event) _AR_EVENT(AR_WORKER_CTX, event)
-#define AR_FLUSH(id) _AR_FLUSH(AR_WORKER_CTX, id)
diff --git a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
deleted file mode 100644
index 54a3489205a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file depthstencil.h
- *
- * @brief Implements depth/stencil functionality
- *
- ******************************************************************************/
-#pragma once
-#include "common/os.h"
-#include "format_conversion.h"
-
-INLINE
-void StencilOp(SWR_STENCILOP op,
- simdscalar const& mask,
- simdscalar const& stencilRefps,
- simdscalar& stencilps)
-{
- simdscalari stencil = _simd_castps_si(stencilps);
-
- switch (op)
- {
- case STENCILOP_KEEP:
- break;
- case STENCILOP_ZERO:
- stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
- break;
- case STENCILOP_REPLACE:
- stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
- break;
- case STENCILOP_INCRSAT:
- {
- simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
- break;
- }
- case STENCILOP_DECRSAT:
- {
- simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
- break;
- }
- case STENCILOP_INCR:
- {
- simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
- break;
- }
- case STENCILOP_DECR:
- {
- simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
- break;
- }
- case STENCILOP_INVERT:
- {
- simdscalar stencilinvert =
- _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
- stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
- break;
- }
- default:
- break;
- }
-}
-
-template <SWR_FORMAT depthFormatT>
-simdscalar QuantizeDepth(simdscalar const& depth)
-{
- SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
- uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
-
- if (depthType == SWR_TYPE_FLOAT)
- {
- // assume only 32bit float depth supported
- SWR_ASSERT(depthBpc == 32);
-
- // matches shader precision, no quantizing needed
- return depth;
- }
-
- // should be unorm depth if not float
- SWR_ASSERT(depthType == SWR_TYPE_UNORM);
-
- float quantize = (float)((1 << depthBpc) - 1);
- simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
- result = _simd_add_ps(result, _simd_set1_ps(0.5f));
- result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
-
- if (depthBpc > 16)
- {
- result = _simd_div_ps(result, _simd_set1_ps(quantize));
- }
- else
- {
- result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
- }
-
- return result;
-}
-
-INLINE
-simdscalar DepthStencilTest(const API_STATE* pState,
- bool frontFacing,
- uint32_t viewportIndex,
- simdscalar const& iZ,
- uint8_t* pDepthBase,
- simdscalar const& coverageMask,
- uint8_t* pStencilBase,
- simdscalar* pStencilMask)
-{
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
- static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
-
- const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
- const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
-
- simdscalar depthResult = _simd_set1_ps(-1.0f);
- simdscalar zbuf;
-
- // clamp Z to viewport [minZ..maxZ]
- simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
- simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
- simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
-
- if (pDSState->depthTestEnable)
- {
- switch (pDSState->depthTestFunc)
- {
- case ZFUNC_NEVER:
- depthResult = _simd_setzero_ps();
- break;
- case ZFUNC_ALWAYS:
- break;
- default:
- zbuf = _simd_load_ps((const float*)pDepthBase);
- }
-
- switch (pDSState->depthTestFunc)
- {
- case ZFUNC_LE:
- depthResult = _simd_cmple_ps(interpZ, zbuf);
- break;
- case ZFUNC_LT:
- depthResult = _simd_cmplt_ps(interpZ, zbuf);
- break;
- case ZFUNC_GT:
- depthResult = _simd_cmpgt_ps(interpZ, zbuf);
- break;
- case ZFUNC_GE:
- depthResult = _simd_cmpge_ps(interpZ, zbuf);
- break;
- case ZFUNC_EQ:
- depthResult = _simd_cmpeq_ps(interpZ, zbuf);
- break;
- case ZFUNC_NE:
- depthResult = _simd_cmpneq_ps(interpZ, zbuf);
- break;
- }
- }
-
- simdscalar stencilMask = _simd_set1_ps(-1.0f);
-
- if (pDSState->stencilTestEnable)
- {
- uint8_t stencilRefValue;
- uint32_t stencilTestFunc;
- uint8_t stencilTestMask;
- if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
- {
- stencilRefValue = pDSState->stencilRefValue;
- stencilTestFunc = pDSState->stencilTestFunc;
- stencilTestMask = pDSState->stencilTestMask;
- }
- else
- {
- stencilRefValue = pDSState->backfaceStencilRefValue;
- stencilTestFunc = pDSState->backfaceStencilTestFunc;
- stencilTestMask = pDSState->backfaceStencilTestMask;
- }
-
- simdvector sbuf;
- simdscalar stencilWithMask;
- simdscalar stencilRef;
- switch (stencilTestFunc)
- {
- case ZFUNC_NEVER:
- stencilMask = _simd_setzero_ps();
- break;
- case ZFUNC_ALWAYS:
- break;
- default:
- LoadSOA<R8_UINT>(pStencilBase, sbuf);
-
- // apply stencil read mask
- stencilWithMask = _simd_castsi_ps(
- _simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
-
- // do stencil compare in float to avoid simd integer emulation in AVX1
- stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
-
- stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
- break;
- }
-
- switch (stencilTestFunc)
- {
- case ZFUNC_LE:
- stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask);
- break;
- case ZFUNC_LT:
- stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask);
- break;
- case ZFUNC_GT:
- stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask);
- break;
- case ZFUNC_GE:
- stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask);
- break;
- case ZFUNC_EQ:
- stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask);
- break;
- case ZFUNC_NE:
- stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask);
- break;
- }
- }
-
- simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
- depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
-
- *pStencilMask = stencilMask;
- return depthWriteMask;
-}
-
-INLINE
-void DepthStencilWrite(const SWR_VIEWPORT* pViewport,
- const SWR_DEPTH_STENCIL_STATE* pDSState,
- bool frontFacing,
- simdscalar const& iZ,
- uint8_t* pDepthBase,
- const simdscalar& depthMask,
- const simdscalar& coverageMask,
- uint8_t* pStencilBase,
- const simdscalar& stencilMask)
-{
- if (pDSState->depthWriteEnable)
- {
- // clamp Z to viewport [minZ..maxZ]
- simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
- simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
- simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
-
- simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
- _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
- }
-
- if (pDSState->stencilWriteEnable)
- {
- simdvector sbuf;
- LoadSOA<R8_UINT>(pStencilBase, sbuf);
- simdscalar stencilbuf = sbuf.v[0];
-
- uint8_t stencilRefValue;
- uint32_t stencilFailOp;
- uint32_t stencilPassDepthPassOp;
- uint32_t stencilPassDepthFailOp;
- uint8_t stencilWriteMask;
- if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
- {
- stencilRefValue = pDSState->stencilRefValue;
- stencilFailOp = pDSState->stencilFailOp;
- stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
- stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
- stencilWriteMask = pDSState->stencilWriteMask;
- }
- else
- {
- stencilRefValue = pDSState->backfaceStencilRefValue;
- stencilFailOp = pDSState->backfaceStencilFailOp;
- stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
- stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
- stencilWriteMask = pDSState->backfaceStencilWriteMask;
- }
-
- simdscalar stencilps = stencilbuf;
- simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
-
- simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
- simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
- simdscalar stencilPassDepthFailMask =
- _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
-
- simdscalar origStencil = stencilps;
-
- StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
- StencilOp((SWR_STENCILOP)stencilPassDepthFailOp,
- stencilPassDepthFailMask,
- stencilRefps,
- stencilps);
- StencilOp((SWR_STENCILOP)stencilPassDepthPassOp,
- stencilPassDepthPassMask,
- stencilRefps,
- stencilps);
-
- // apply stencil write mask
- simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
- stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
- stencilps =
- _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
-
- simdvector stencilResult;
- stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
- StoreSOA<R8_UINT>(stencilResult, pStencilBase);
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
deleted file mode 100644
index 9a9cc2635df..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file fifo.hpp
- *
- * @brief Definitions for our fifos used for thread communication.
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/os.h"
-#include "arena.h"
-
-#include <vector>
-#include <cassert>
-
-template <class T>
-struct QUEUE
-{
- OSALIGNLINE(volatile uint32_t) mLock{0};
- OSALIGNLINE(volatile uint32_t) mNumEntries{0};
- std::vector<T*> mBlocks;
- T* mCurBlock{nullptr};
- uint32_t mHead{0};
- uint32_t mTail{0};
- uint32_t mCurBlockIdx{0};
-
- // power of 2
- static const uint32_t mBlockSizeShift = 6;
- static const uint32_t mBlockSize = 1 << mBlockSizeShift;
-
- template <typename ArenaT>
- void clear(ArenaT& arena)
- {
- mHead = 0;
- mTail = 0;
- mBlocks.clear();
- T* pNewBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
- mBlocks.push_back(pNewBlock);
- mCurBlock = pNewBlock;
- mCurBlockIdx = 0;
- mNumEntries = 0;
- mLock = 0;
- }
-
- uint32_t getNumQueued() { return mNumEntries; }
-
- bool tryLock()
- {
- if (mLock)
- {
- return false;
- }
-
- // try to lock the FIFO
- long initial = InterlockedCompareExchange(&mLock, 1, 0);
- return (initial == 0);
- }
-
- void unlock() { mLock = 0; }
-
- T* peek()
- {
- if (mNumEntries == 0)
- {
- return nullptr;
- }
- uint32_t block = mHead >> mBlockSizeShift;
- return &mBlocks[block][mHead & (mBlockSize - 1)];
- }
-
- void dequeue_noinc()
- {
- mHead++;
- mNumEntries--;
- }
-
- template <typename ArenaT>
- bool enqueue_try_nosync(ArenaT& arena, const T* entry)
- {
- const float* pSrc = (const float*)entry;
- float* pDst = (float*)&mCurBlock[mTail];
-
- auto lambda = [&](int32_t i) {
- __m256 vSrc = _mm256_load_ps(pSrc + i * KNOB_SIMD_WIDTH);
- _mm256_stream_ps(pDst + i * KNOB_SIMD_WIDTH, vSrc);
- };
-
- const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH * 4);
- static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
- "FIFO element size should be multiple of SIMD width.");
-
- UnrollerL<0, numSimdLines, 1>::step(lambda);
-
- mTail++;
- if (mTail == mBlockSize)
- {
- if (++mCurBlockIdx < mBlocks.size())
- {
- mCurBlock = mBlocks[mCurBlockIdx];
- }
- else
- {
- T* newBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
- SWR_ASSERT(newBlock);
-
- mBlocks.push_back(newBlock);
- mCurBlock = newBlock;
- }
-
- mTail = 0;
- }
-
- mNumEntries++;
- return true;
- }
-
- void destroy() {}
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_conversion.h b/src/gallium/drivers/swr/rasterizer/core/format_conversion.h
deleted file mode 100644
index f1ea06c4978..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/format_conversion.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file format_conversion.h
- *
- * @brief API implementation
- *
- ******************************************************************************/
-#include "format_types.h"
-#include "format_traits.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Load SIMD packed pixels in SOA format and converts to
-/// SOA RGBA32_FLOAT format.
-/// @param pSrc - source data in SOA form
-/// @param dst - output data in SOA form
-template <typename SIMD_T, SWR_FORMAT SrcFormat>
-INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, Vec4<SIMD_T>& dst)
-{
- // fast path for float32
- if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
- (FormatTraits<SrcFormat>::GetBPC(0) == 32))
- {
- auto lambda = [&](int comp)
- {
- Float<SIMD_T> vComp =
- SIMD_T::load_ps(reinterpret_cast<const float*>(pSrc + comp * sizeof(Float<SIMD_T>)));
-
- dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
- };
-
- UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
- return;
- }
-
- auto lambda = [&](int comp)
- {
- // load SIMD components
- Float<SIMD_T> vComp;
- FormatTraits<SrcFormat>::loadSOA(comp, pSrc, vComp);
-
- // unpack
- vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
-
- // convert
- if (FormatTraits<SrcFormat>::isNormalized(comp))
- {
- vComp = SIMD_T::cvtepi32_ps(SIMD_T::castps_si(vComp));
- vComp = SIMD_T::mul_ps(vComp, SIMD_T::set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
- }
-
- dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
-
- // is there a better way to get this from the SIMD traits?
- const uint32_t SIMD_WIDTH = sizeof(typename SIMD_T::Float) / sizeof(float);
-
- pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * SIMD_WIDTH) / 8;
- };
-
- UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
-}
-
-template <SWR_FORMAT SrcFormat>
-INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simdvector& dst)
-{
- LoadSOA<SIMD256, SrcFormat>(pSrc, dst);
-}
-
-template <SWR_FORMAT SrcFormat>
-INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simd16vector& dst)
-{
- LoadSOA<SIMD512, SrcFormat>(pSrc, dst);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Clamps the given component based on the requirements on the
-/// Format template arg
-/// @param vComp - SIMD vector of floats
-/// @param Component - component
-template <typename SIMD_T, SWR_FORMAT Format>
-INLINE Float<SIMD_T> SIMDCALL Clamp(Float<SIMD_T> const& v, uint32_t Component)
-{
- Float<SIMD_T> vComp = v;
- if (Component >= 4 || Component < 0)
- {
- // Component shouldn't out of <0;3> range
- assert(false);
- return vComp;
- }
- if (FormatTraits<Format>::isNormalized(Component))
- {
- if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
- {
- vComp = SIMD_T::max_ps(vComp, SIMD_T::setzero_ps());
- }
-
- if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
- {
- vComp = SIMD_T::max_ps(vComp, SIMD_T::set1_ps(-1.0f));
- }
- vComp = SIMD_T::min_ps(vComp, SIMD_T::set1_ps(1.0f));
- }
- else if (FormatTraits<Format>::GetBPC(Component) < 32)
- {
- if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
- {
- int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
- int iMin = 0;
- Integer<SIMD_T> vCompi = SIMD_T::castps_si(vComp);
- vCompi = SIMD_T::max_epu32(vCompi, SIMD_T::set1_epi32(iMin));
- vCompi = SIMD_T::min_epu32(vCompi, SIMD_T::set1_epi32(iMax));
- vComp = SIMD_T::castsi_ps(vCompi);
- }
- else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
- {
- int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
- int iMin = -1 - iMax;
- Integer<SIMD_T> vCompi = SIMD_T::castps_si(vComp);
- vCompi = SIMD_T::max_epi32(vCompi, SIMD_T::set1_epi32(iMin));
- vCompi = SIMD_T::min_epi32(vCompi, SIMD_T::set1_epi32(iMax));
- vComp = SIMD_T::castsi_ps(vCompi);
- }
- }
-
- return vComp;
-}
-
-template <SWR_FORMAT Format>
-INLINE simdscalar SIMDCALL Clamp(simdscalar const& v, uint32_t Component)
-{
- return Clamp<SIMD256, Format>(v, Component);
-}
-
-template <SWR_FORMAT Format>
-INLINE simd16scalar SIMDCALL Clamp(simd16scalar const& v, uint32_t Component)
-{
- return Clamp<SIMD512, Format>(v, Component);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Normalize the given component based on the requirements on the
-/// Format template arg
-/// @param vComp - SIMD vector of floats
-/// @param Component - component
-template <typename SIMD_T, SWR_FORMAT Format>
-INLINE Float<SIMD_T> SIMDCALL Normalize(Float<SIMD_T> const& vComp, uint32_t Component)
-{
- Float<SIMD_T> r = vComp;
- if (FormatTraits<Format>::isNormalized(Component))
- {
- r = SIMD_T::mul_ps(r, SIMD_T::set1_ps(FormatTraits<Format>::fromFloat(Component)));
- r = SIMD_T::castsi_ps(SIMD_T::cvtps_epi32(r));
- }
- return r;
-}
-
-template <SWR_FORMAT Format>
-INLINE simdscalar SIMDCALL Normalize(simdscalar const& vComp, uint32_t Component)
-{
- return Normalize<SIMD256, Format>(vComp, Component);
-}
-
-template <SWR_FORMAT Format>
-INLINE simd16scalar SIMDCALL Normalize(simd16scalar const& vComp, uint32_t Component)
-{
- return Normalize<SIMD512, Format>(vComp, Component);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Convert and store simdvector of pixels in SOA
-/// RGBA32_FLOAT to SOA format
-/// @param src - source data in SOA form
-/// @param dst - output data in SOA form
-template <typename SIMD_T, SWR_FORMAT DstFormat>
-INLINE void SIMDCALL StoreSOA(const Vec4<SIMD_T>& src, uint8_t* pDst)
-{
- // fast path for float32
- if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
- (FormatTraits<DstFormat>::GetBPC(0) == 32))
- {
- for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
- {
- Float<SIMD_T> vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
-
- // Gamma-correct
- if (FormatTraits<DstFormat>::isSRGB)
- {
- if (comp < 3) // Input format is always RGBA32_FLOAT.
- {
- vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
- }
- }
-
- SIMD_T::store_ps(reinterpret_cast<float*>(pDst + comp * sizeof(simd16scalar)), vComp);
- }
- return;
- }
-
- auto lambda = [&](int comp) {
- Float<SIMD_T> vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
-
- // Gamma-correct
- if (FormatTraits<DstFormat>::isSRGB)
- {
- if (comp < 3) // Input format is always RGBA32_FLOAT.
- {
- vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
- }
- }
-
- // clamp
- vComp = Clamp<SIMD_T, DstFormat>(vComp, comp);
-
- // normalize
- vComp = Normalize<SIMD_T, DstFormat>(vComp, comp);
-
- // pack
- vComp = FormatTraits<DstFormat>::pack(comp, vComp);
-
- // store
- FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
-
- // is there a better way to get this from the SIMD traits?
- const uint32_t SIMD_WIDTH = sizeof(typename SIMD_T::Float) / sizeof(float);
-
- pDst += (FormatTraits<DstFormat>::GetBPC(comp) * SIMD_WIDTH) / 8;
- };
-
- UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
-}
-
-template <SWR_FORMAT DstFormat>
-INLINE void SIMDCALL StoreSOA(const simdvector& src, uint8_t* pDst)
-{
- StoreSOA<SIMD256, DstFormat>(src, pDst);
-}
-
-template <SWR_FORMAT DstFormat>
-INLINE void SIMDCALL StoreSOA(const simd16vector& src, uint8_t* pDst)
-{
- StoreSOA<SIMD512, DstFormat>(src, pDst);
-}
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_traits.h b/src/gallium/drivers/swr/rasterizer/core/format_traits.h
deleted file mode 100644
index 97e7d56e48e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/format_traits.h
+++ /dev/null
@@ -1,4046 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file format_traits.h
- *
- * @brief Format Traits. auto-generated file
- *
- * DO NOT EDIT
- *
- ******************************************************************************/
-#pragma once
-
-#include "format_types.h"
-#include "format_utils.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatSwizzle - Component swizzle selects
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t comp0 = 0, uint32_t comp1 = 0, uint32_t comp2 = 0, uint32_t comp3 = 0>
-struct FormatSwizzle
-{
- // Return swizzle select for component.
- INLINE static uint32_t swizzle(uint32_t c)
- {
- static const uint32_t s[4] = {comp0, comp1, comp2, comp3};
- return s[c];
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits - Format traits
-//////////////////////////////////////////////////////////////////////////
-template <SWR_FORMAT format>
-struct FormatTraits : ComponentTraits<SWR_TYPE_UNKNOWN, 0>, FormatSwizzle<0>, Defaults<0, 0, 0, 0>
-{
- static const uint32_t bpp{0};
- static const uint32_t numComps{0};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
-
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32A32_FLOAT> - Format traits specialization for R32G32B32A32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32A32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
- 32,
- SWR_TYPE_FLOAT,
- 32,
- SWR_TYPE_FLOAT,
- 32,
- SWR_TYPE_FLOAT,
- 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32A32_SINT> - Format traits specialization for R32G32B32A32_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32A32_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32A32_UINT> - Format traits specialization for R32G32B32A32_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32A32_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R64G64_FLOAT> - Format traits specialization for R64G64_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R64G64_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose64_64 TransposeT;
- typedef Format2<64, 64> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for R32G32B32X32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
- 32,
- SWR_TYPE_FLOAT,
- 32,
- SWR_TYPE_FLOAT,
- 32,
- SWR_TYPE_UNUSED,
- 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32A32_SSCALED> - Format traits specialization for R32G32B32A32_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32A32_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
- 32,
- SWR_TYPE_SSCALED,
- 32,
- SWR_TYPE_SSCALED,
- 32,
- SWR_TYPE_SSCALED,
- 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32A32_USCALED> - Format traits specialization for R32G32B32A32_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32A32_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
- 32,
- SWR_TYPE_USCALED,
- 32,
- SWR_TYPE_USCALED,
- 32,
- SWR_TYPE_USCALED,
- 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32A32_SFIXED> - Format traits specialization for R32G32B32A32_SFIXED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32A32_SFIXED> : ComponentTraits<SWR_TYPE_SFIXED,
- 32,
- SWR_TYPE_SFIXED,
- 32,
- SWR_TYPE_SFIXED,
- 32,
- SWR_TYPE_SFIXED,
- 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32_32 TransposeT;
- typedef Format4<32, 32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32_FLOAT> - Format traits specialization for R32G32B32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{96};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32 TransposeT;
- typedef Format3<32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32_SINT> - Format traits specialization for R32G32B32_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{96};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32 TransposeT;
- typedef Format3<32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32_UINT> - Format traits specialization for R32G32B32_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{96};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32 TransposeT;
- typedef Format3<32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32_SSCALED> - Format traits specialization for R32G32B32_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32_SSCALED>
- : ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{96};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32 TransposeT;
- typedef Format3<32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32_USCALED> - Format traits specialization for R32G32B32_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32_USCALED>
- : ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{96};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32 TransposeT;
- typedef Format3<32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32B32_SFIXED> - Format traits specialization for R32G32B32_SFIXED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32B32_SFIXED>
- : ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{96};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32_32 TransposeT;
- typedef Format3<32, 32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_UNORM> - Format traits specialization for R16G16B16A16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
- 16,
- SWR_TYPE_UNORM,
- 16,
- SWR_TYPE_UNORM,
- 16,
- SWR_TYPE_UNORM,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_SNORM> - Format traits specialization for R16G16B16A16_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
- 16,
- SWR_TYPE_SNORM,
- 16,
- SWR_TYPE_SNORM,
- 16,
- SWR_TYPE_SNORM,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_SINT> - Format traits specialization for R16G16B16A16_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_UINT> - Format traits specialization for R16G16B16A16_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_FLOAT> - Format traits specialization for R16G16B16A16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
- 16,
- SWR_TYPE_FLOAT,
- 16,
- SWR_TYPE_FLOAT,
- 16,
- SWR_TYPE_FLOAT,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32_FLOAT> - Format traits specialization for R32G32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32_SINT> - Format traits specialization for R32G32_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32_SINT> : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32_UINT> - Format traits specialization for R32G32_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32_UINT> : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_FLOAT_X8X24_TYPELESS> - Format traits specialization for
-/// R32_FLOAT_X8X24_TYPELESS
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_FLOAT_X8X24_TYPELESS>
- : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<X32_TYPELESS_G8X24_UINT> - Format traits specialization for X32_TYPELESS_G8X24_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<X32_TYPELESS_G8X24_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UNUSED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L32A32_FLOAT> - Format traits specialization for L32A32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L32A32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R64_FLOAT> - Format traits specialization for R64_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R64_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 64>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<64> TransposeT;
- typedef Format1<64> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for R16G16B16X16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16X16_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
- 16,
- SWR_TYPE_UNORM,
- 16,
- SWR_TYPE_UNORM,
- 16,
- SWR_TYPE_UNUSED,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16X16_FLOAT> - Format traits specialization for R16G16B16X16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16X16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
- 16,
- SWR_TYPE_FLOAT,
- 16,
- SWR_TYPE_FLOAT,
- 16,
- SWR_TYPE_UNUSED,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L32X32_FLOAT> - Format traits specialization for L32X32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I32X32_FLOAT> - Format traits specialization for I32X32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_SSCALED> - Format traits specialization for R16G16B16A16_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
- 16,
- SWR_TYPE_SSCALED,
- 16,
- SWR_TYPE_SSCALED,
- 16,
- SWR_TYPE_SSCALED,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16A16_USCALED> - Format traits specialization for R16G16B16A16_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16A16_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
- 16,
- SWR_TYPE_USCALED,
- 16,
- SWR_TYPE_USCALED,
- 16,
- SWR_TYPE_USCALED,
- 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16_16 TransposeT;
- typedef Format4<16, 16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32_SSCALED> - Format traits specialization for R32G32_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32_USCALED> - Format traits specialization for R32G32_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32G32_SFIXED> - Format traits specialization for R32G32_SFIXED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32G32_SFIXED> : ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose32_32 TransposeT;
- typedef Format2<32, 32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B8G8R8A8_UNORM> - Format traits specialization for B8G8R8A8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B8G8R8A8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B8G8R8A8_UNORM_SRGB> - Format traits specialization for B8G8R8A8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B8G8R8A8_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_UNORM> - Format traits specialization for R10G10B10A2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_UNORM_SRGB> - Format traits specialization for R10G10B10A2_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_UINT> - Format traits specialization for R10G10B10A2_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_UNORM> - Format traits specialization for R8G8B8A8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_UNORM_SRGB> - Format traits specialization for R8G8B8A8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_SNORM> - Format traits specialization for R8G8B8A8_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_SINT> - Format traits specialization for R8G8B8A8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_UINT> - Format traits specialization for R8G8B8A8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_UNORM> - Format traits specialization for R16G16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_SNORM> - Format traits specialization for R16G16_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_SNORM> : ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_SINT> - Format traits specialization for R16G16_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_SINT> : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_UINT> - Format traits specialization for R16G16_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_UINT> : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_FLOAT> - Format traits specialization for R16G16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_UNORM> - Format traits specialization for B10G10R10A2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_UNORM_SRGB> - Format traits specialization for B10G10R10A2_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R11G11B10_FLOAT> - Format traits specialization for R11G11B10_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R11G11B10_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 10>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose11_11_10 TransposeT;
- typedef Format3<11, 11, 10> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10_FLOAT_A2_UNORM> - Format traits specialization for
-/// R10G10B10_FLOAT_A2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10_FLOAT_A2_UNORM> : ComponentTraits<SWR_TYPE_FLOAT,
- 10,
- SWR_TYPE_FLOAT,
- 10,
- SWR_TYPE_FLOAT,
- 10,
- SWR_TYPE_UNORM,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_SINT> - Format traits specialization for R32_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_UINT> - Format traits specialization for R32_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_FLOAT> - Format traits specialization for R32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R24_UNORM_X8_TYPELESS> - Format traits specialization for R24_UNORM_X8_TYPELESS
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R24_UNORM_X8_TYPELESS>
- : ComponentTraits<SWR_TYPE_UNORM, 24>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<24> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<X24_TYPELESS_G8_UINT> - Format traits specialization for X24_TYPELESS_G8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<X24_TYPELESS_G8_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 32>, FormatSwizzle<1>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L32_UNORM> - Format traits specialization for L32_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L32_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L16A16_UNORM> - Format traits specialization for L16A16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L16A16_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I24X8_UNORM> - Format traits specialization for I24X8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I24X8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose24_8 TransposeT;
- typedef Format2<24, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L24X8_UNORM> - Format traits specialization for L24X8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L24X8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose24_8 TransposeT;
- typedef Format2<24, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I32_FLOAT> - Format traits specialization for I32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I32_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L32_FLOAT> - Format traits specialization for L32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L32_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<A32_FLOAT> - Format traits specialization for A32_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<A32_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B8G8R8X8_UNORM> - Format traits specialization for B8G8R8X8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B8G8R8X8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B8G8R8X8_UNORM_SRGB> - Format traits specialization for B8G8R8X8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B8G8R8X8_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8X8_UNORM> - Format traits specialization for R8G8B8X8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8X8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8X8_UNORM_SRGB> - Format traits specialization for R8G8B8X8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8X8_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R9G9B9E5_SHAREDEXP> - Format traits specialization for R9G9B9E5_SHAREDEXP
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R9G9B9E5_SHAREDEXP>
- : ComponentTraits<SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 5>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose9_9_9_5 TransposeT;
- typedef Format4<9, 9, 9, 5> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10X2_UNORM> - Format traits specialization for B10G10R10X2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10X2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNORM,
- 10,
- SWR_TYPE_UNUSED,
- 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L16A16_FLOAT> - Format traits specialization for L16A16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L16A16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10X2_USCALED> - Format traits specialization for R10G10B10X2_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10X2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_UNUSED,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_SSCALED> - Format traits specialization for R8G8B8A8_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
- 8,
- SWR_TYPE_SSCALED,
- 8,
- SWR_TYPE_SSCALED,
- 8,
- SWR_TYPE_SSCALED,
- 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8A8_USCALED> - Format traits specialization for R8G8B8A8_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8A8_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
- 8,
- SWR_TYPE_USCALED,
- 8,
- SWR_TYPE_USCALED,
- 8,
- SWR_TYPE_USCALED,
- 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_SSCALED> - Format traits specialization for R16G16_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16_USCALED> - Format traits specialization for R16G16_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16 TransposeT;
- typedef Format2<16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_SSCALED> - Format traits specialization for R32_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_SSCALED>
- : ComponentTraits<SWR_TYPE_SSCALED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_USCALED> - Format traits specialization for R32_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_USCALED>
- : ComponentTraits<SWR_TYPE_USCALED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B5G6R5_UNORM> - Format traits specialization for B5G6R5_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B5G6R5_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
- FormatSwizzle<2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose5_6_5 TransposeT;
- typedef Format3<5, 6, 5> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B5G6R5_UNORM_SRGB> - Format traits specialization for B5G6R5_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B5G6R5_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
- FormatSwizzle<2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose5_6_5 TransposeT;
- typedef Format3<5, 6, 5> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B5G5R5A1_UNORM> - Format traits specialization for B5G5R5A1_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B5G5R5A1_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose5_5_5_1 TransposeT;
- typedef Format4<5, 5, 5, 1> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B5G5R5A1_UNORM_SRGB> - Format traits specialization for B5G5R5A1_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B5G5R5A1_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose5_5_5_1 TransposeT;
- typedef Format4<5, 5, 5, 1> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B4G4R4A4_UNORM> - Format traits specialization for B4G4R4A4_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B4G4R4A4_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose4_4_4_4 TransposeT;
- typedef Format4<4, 4, 4, 4> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B4G4R4A4_UNORM_SRGB> - Format traits specialization for B4G4R4A4_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B4G4R4A4_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose4_4_4_4 TransposeT;
- typedef Format4<4, 4, 4, 4> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8_UNORM> - Format traits specialization for R8G8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8_SNORM> - Format traits specialization for R8G8_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8_SNORM> : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8_SINT> - Format traits specialization for R8G8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8_SINT> : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8_UINT> - Format traits specialization for R8G8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8_UINT> : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_UNORM> - Format traits specialization for R16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_SNORM> - Format traits specialization for R16_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_SINT> - Format traits specialization for R16_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_UINT> - Format traits specialization for R16_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_FLOAT> - Format traits specialization for R16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I16_UNORM> - Format traits specialization for I16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I16_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L16_UNORM> - Format traits specialization for L16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L16_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<A16_UNORM> - Format traits specialization for A16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<A16_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8A8_UNORM> - Format traits specialization for L8A8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8A8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I16_FLOAT> - Format traits specialization for I16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I16_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L16_FLOAT> - Format traits specialization for L16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L16_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<A16_FLOAT> - Format traits specialization for A16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<A16_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8A8_UNORM_SRGB> - Format traits specialization for L8A8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8A8_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B5G5R5X1_UNORM> - Format traits specialization for B5G5R5X1_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B5G5R5X1_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose5_5_5_1 TransposeT;
- typedef Format4<5, 5, 5, 1> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B5G5R5X1_UNORM_SRGB> - Format traits specialization for B5G5R5X1_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B5G5R5X1_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose5_5_5_1 TransposeT;
- typedef Format4<5, 5, 5, 1> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8_SSCALED> - Format traits specialization for R8G8_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8_USCALED> - Format traits specialization for R8G8_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_SSCALED> - Format traits specialization for R16_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_SSCALED>
- : ComponentTraits<SWR_TYPE_SSCALED, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16_USCALED> - Format traits specialization for R16_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16_USCALED>
- : ComponentTraits<SWR_TYPE_USCALED, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<16> TransposeT;
- typedef Format1<16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<A1B5G5R5_UNORM> - Format traits specialization for A1B5G5R5_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<A1B5G5R5_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 1, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5>,
- FormatSwizzle<3, 2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose1_5_5_5 TransposeT;
- typedef Format4<1, 5, 5, 5> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<A4B4G4R4_UNORM> - Format traits specialization for A4B4G4R4_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<A4B4G4R4_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
- FormatSwizzle<3, 2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose4_4_4_4 TransposeT;
- typedef Format4<4, 4, 4, 4> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8A8_UINT> - Format traits specialization for L8A8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8A8_UINT> : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8A8_SINT> - Format traits specialization for L8A8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8A8_SINT> : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{16};
- static const uint32_t numComps{2};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{1};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8 TransposeT;
- typedef Format2<8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8_UNORM> - Format traits specialization for R8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8_SNORM> - Format traits specialization for R8_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8_SINT> - Format traits specialization for R8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8_UINT> - Format traits specialization for R8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<A8_UNORM> - Format traits specialization for A8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<A8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I8_UNORM> - Format traits specialization for I8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8_UNORM> - Format traits specialization for L8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8_SSCALED> - Format traits specialization for R8_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8_SSCALED>
- : ComponentTraits<SWR_TYPE_SSCALED, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8_USCALED> - Format traits specialization for R8_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8_USCALED>
- : ComponentTraits<SWR_TYPE_USCALED, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8_UNORM_SRGB> - Format traits specialization for L8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8_UINT> - Format traits specialization for L8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<L8_SINT> - Format traits specialization for L8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<L8_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I8_UINT> - Format traits specialization for I8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I8_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<I8_SINT> - Format traits specialization for I8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<I8_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<DXT1_RGB_SRGB> - Format traits specialization for DXT1_RGB_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<DXT1_RGB_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<YCRCB_SWAPUVY> - Format traits specialization for YCRCB_SWAPUVY
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<YCRCB_SWAPUVY>
- : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{true};
- static const uint32_t bcWidth{2};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC1_UNORM> - Format traits specialization for BC1_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC1_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC2_UNORM> - Format traits specialization for BC2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC2_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC3_UNORM> - Format traits specialization for BC3_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC3_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC4_UNORM> - Format traits specialization for BC4_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC4_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC5_UNORM> - Format traits specialization for BC5_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC5_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC1_UNORM_SRGB> - Format traits specialization for BC1_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC1_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC2_UNORM_SRGB> - Format traits specialization for BC2_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC2_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC3_UNORM_SRGB> - Format traits specialization for BC3_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC3_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<YCRCB_SWAPUV> - Format traits specialization for YCRCB_SWAPUV
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<YCRCB_SWAPUV>
- : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{true};
- static const uint32_t bcWidth{2};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8_8 TransposeT;
- typedef Format4<8, 8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<DXT1_RGB> - Format traits specialization for DXT1_RGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<DXT1_RGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_UNORM> - Format traits specialization for R8G8B8_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_SNORM> - Format traits specialization for R8G8B8_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_SSCALED> - Format traits specialization for R8G8B8_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_SSCALED>
- : ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_USCALED> - Format traits specialization for R8G8B8_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_USCALED>
- : ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R64G64B64A64_FLOAT> - Format traits specialization for R64G64B64A64_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R64G64B64A64_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
- 64,
- SWR_TYPE_FLOAT,
- 64,
- SWR_TYPE_FLOAT,
- 64,
- SWR_TYPE_FLOAT,
- 64>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{256};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose64_64_64_64 TransposeT;
- typedef Format4<64, 64, 64, 64> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R64G64B64_FLOAT> - Format traits specialization for R64G64B64_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R64G64B64_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{192};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose64_64_64 TransposeT;
- typedef Format3<64, 64, 64> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC4_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{64};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC5_SNORM> - Format traits specialization for BC5_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC5_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_FLOAT> - Format traits specialization for R16G16B16_FLOAT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_FLOAT>
- : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_UNORM> - Format traits specialization for R16G16B16_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_SNORM> - Format traits specialization for R16G16B16_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_SNORM>
- : ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_SSCALED> - Format traits specialization for R16G16B16_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_SSCALED>
- : ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_USCALED> - Format traits specialization for R16G16B16_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_USCALED>
- : ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC6H_SF16> - Format traits specialization for BC6H_SF16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC6H_SF16>
- : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC7_UNORM> - Format traits specialization for BC7_UNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC7_UNORM>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC7_UNORM_SRGB> - Format traits specialization for BC7_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC7_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{true};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<BC6H_UF16> - Format traits specialization for BC6H_UF16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<BC6H_UF16>
- : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{128};
- static const uint32_t numComps{1};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{true};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{4};
- static const uint32_t bcHeight{4};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_UNORM_SRGB> - Format traits specialization for R8G8B8_UNORM_SRGB
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_UNORM_SRGB>
- : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{true};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_UINT> - Format traits specialization for R16G16B16_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R16G16B16_SINT> - Format traits specialization for R16G16B16_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R16G16B16_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{48};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose16_16_16 TransposeT;
- typedef Format3<16, 16, 16> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_SFIXED> - Format traits specialization for R32_SFIXED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R32_SFIXED>
- : ComponentTraits<SWR_TYPE_SFIXED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<32> TransposeT;
- typedef Format1<32> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_SNORM> - Format traits specialization for R10G10B10A2_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
- 10,
- SWR_TYPE_SNORM,
- 10,
- SWR_TYPE_SNORM,
- 10,
- SWR_TYPE_SNORM,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_USCALED> - Format traits specialization for R10G10B10A2_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_SSCALED> - Format traits specialization for R10G10B10A2_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
- 10,
- SWR_TYPE_SSCALED,
- 10,
- SWR_TYPE_SSCALED,
- 10,
- SWR_TYPE_SSCALED,
- 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10A2_SINT> - Format traits specialization for R10G10B10A2_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R10G10B10A2_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_SNORM> - Format traits specialization for B10G10R10A2_SNORM
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
- 10,
- SWR_TYPE_SNORM,
- 10,
- SWR_TYPE_SNORM,
- 10,
- SWR_TYPE_SNORM,
- 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_USCALED> - Format traits specialization for B10G10R10A2_USCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 10,
- SWR_TYPE_USCALED,
- 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_SSCALED> - Format traits specialization for B10G10R10A2_SSCALED
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
- 10,
- SWR_TYPE_SSCALED,
- 10,
- SWR_TYPE_SSCALED,
- 10,
- SWR_TYPE_SSCALED,
- 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_UINT> - Format traits specialization for B10G10R10A2_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<B10G10R10A2_SINT> - Format traits specialization for B10G10R10A2_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<B10G10R10A2_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{32};
- static const uint32_t numComps{4};
- static const bool hasAlpha{true};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose10_10_10_2 TransposeT;
- typedef Format4<10, 10, 10, 2> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_UINT> - Format traits specialization for R8G8B8_UINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_UINT>
- : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R8G8B8_SINT> - Format traits specialization for R8G8B8_SINT
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<R8G8B8_SINT>
- : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{24};
- static const uint32_t numComps{3};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{0};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef Transpose8_8_8 TransposeT;
- typedef Format3<8, 8, 8> FormatT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<RAW> - Format traits specialization for RAW
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct FormatTraits<RAW>
- : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{8};
- static const uint32_t numComps{1};
- static const bool hasAlpha{false};
- static const uint32_t alphaComp{3};
- static const bool isSRGB{false};
- static const bool isBC{false};
- static const bool isSubsampled{false};
- static const uint32_t bcWidth{1};
- static const uint32_t bcHeight{1};
-
- typedef TransposeSingleComponent<8> TransposeT;
- typedef Format1<8> FormatT;
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h b/src/gallium/drivers/swr/rasterizer/core/format_types.h
deleted file mode 100644
index 7d7dd843349..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/format_types.h
+++ /dev/null
@@ -1,1629 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file formats.h
- *
- * @brief Definitions for SWR_FORMAT functions.
- *
- ******************************************************************************/
-#pragma once
-
-#include "utils.h"
-#include "common/simdintrin.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking same pixel sizes
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t NumBits, bool Signed = false>
-struct PackTraits
-{
- static const uint32_t MyNumBits = NumBits;
-
- static simdscalar loadSOA(const uint8_t* pSrc) = delete;
- static void storeSOA(uint8_t* pDst, simdscalar const& src) = delete;
- static simdscalar unpack(simdscalar& in) = delete;
- static simdscalar pack(simdscalar& in) = delete;
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc) = delete;
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src) = delete;
- static simd16scalar unpack(simd16scalar& in) = delete;
- static simd16scalar pack(simd16scalar& in) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking unused channels
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct PackTraits<0, false>
-{
- static const uint32_t MyNumBits = 0;
-
- static simdscalar loadSOA(const uint8_t* pSrc) { return _simd_setzero_ps(); }
- static void storeSOA(uint8_t* pDst, simdscalar const& src) { return; }
- static simdscalar unpack(simdscalar& in) { return _simd_setzero_ps(); }
- static simdscalar pack(simdscalar& in) { return _simd_setzero_ps(); }
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc) { return _simd16_setzero_ps(); }
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src) { return; }
- static simd16scalar unpack(simd16scalar& in) { return _simd16_setzero_ps(); }
- static simd16scalar pack(simd16scalar& in) { return _simd16_setzero_ps(); }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking 8 bit unsigned channels
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct PackTraits<8, false>
-{
- static const uint32_t MyNumBits = 8;
-
- static simdscalar loadSOA(const uint8_t* pSrc)
- {
-#if KNOB_SIMD_WIDTH == 8
- __m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
- return _mm256_insertf128_ps(result, vLo, 0);
-#else
-#error Unsupported vector width
-#endif
- }
-
- static void storeSOA(uint8_t* pDst, simdscalar const& src)
- {
- // store simd bytes
-#if KNOB_SIMD_WIDTH == 8
- _mm_storel_pd((double*)pDst, _mm_castps_pd(_mm256_castps256_ps128(src)));
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar unpack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
-#if KNOB_ARCH <= KNOB_ARCH_AVX
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
- __m128i resLo = _mm_cvtepu8_epi32(src);
- __m128i resHi =
- _mm_shuffle_epi8(src, _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
-
- __m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
- return simdscalar{_mm256_castsi256_ps(result)};
-#else
- return _mm256_castsi256_ps(
- _mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
-#endif
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar pack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src = _simd_castps_si(in);
- __m128i res16 =
- _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
- __m128i res8 = _mm_packus_epi16(res16, _mm_undefined_si128());
- return _mm256_castsi256_ps(_mm256_castsi128_si256(res8));
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc)
- {
- simd16scalar result = _simd16_setzero_ps();
- simdscalar resultlo = _simd_setzero_ps();
-
- const __m128 src = _mm_load_ps(reinterpret_cast<const float*>(pSrc));
-
- resultlo = _mm256_insertf128_ps(resultlo, src, 0);
- result = _simd16_insert_ps(result, resultlo, 0);
-
- return result;
- }
-
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
- {
- // store simd16 bytes
- _mm_store_ps(reinterpret_cast<float*>(pDst),
- _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
- }
-
- static simd16scalar unpack(simd16scalar& in)
- {
- simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
- simd16scalari result = _simd16_cvtepu8_epi32(tmp);
-
- return _simd16_castsi_ps(result);
- }
-
- static simd16scalar pack(simd16scalar& in)
- {
- // clang-format off
-
- simd16scalari result = _simd16_setzero_si();
-
- simdscalari inlo = _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
- simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
-
- simdscalari permlo = _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
- simdscalari permhi = _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
-
- simdscalari pack = _simd_packus_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
-
- const simdscalari zero = _simd_setzero_si();
-
- permlo = _simd_permute2f128_si(pack, zero, 0x20); // (2, 0) // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
- permhi = _simd_permute2f128_si(pack, zero, 0x31); // (3, 1) // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
-
- pack = _simd_packus_epi16(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
-
- result = _simd16_insert_si(result, pack, 0);
-
- return _simd16_castsi_ps(result);
-
- // clang-format on
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking 8 bit signed channels
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct PackTraits<8, true>
-{
- static const uint32_t MyNumBits = 8;
-
- static simdscalar loadSOA(const uint8_t* pSrc)
- {
-#if KNOB_SIMD_WIDTH == 8
- __m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
- return _mm256_insertf128_ps(result, vLo, 0);
-#else
-#error Unsupported vector width
-#endif
- }
-
- static void storeSOA(uint8_t* pDst, simdscalar const& src)
- {
- // store simd bytes
-#if KNOB_SIMD_WIDTH == 8
- _mm_storel_pd((double*)pDst, _mm_castps_pd(_mm256_castps256_ps128(src)));
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar unpack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
-#if KNOB_ARCH <= KNOB_ARCH_AVX
- SWR_INVALID("I think this may be incorrect.");
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
- __m128i resLo = _mm_cvtepi8_epi32(src);
- __m128i resHi =
- _mm_shuffle_epi8(src, _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
-
- __m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
- return _mm256_castsi256_ps(result);
-#else
- return _mm256_castsi256_ps(
- _mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
-#endif
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar pack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src = _simd_castps_si(in);
- __m128i res16 =
- _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
- __m128i res8 = _mm_packs_epi16(res16, _mm_undefined_si128());
- return _mm256_castsi256_ps(_mm256_castsi128_si256(res8));
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc)
- {
- simd16scalar result = _simd16_setzero_ps();
- simdscalar resultlo = _simd_setzero_ps();
-
- const __m128 src = _mm_load_ps(reinterpret_cast<const float*>(pSrc));
-
- resultlo = _mm256_insertf128_ps(resultlo, src, 0);
- result = _simd16_insert_ps(result, resultlo, 0);
-
- return result;
- }
-
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
- {
- // store simd16 bytes
- _mm_store_ps(reinterpret_cast<float*>(pDst),
- _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
- }
-
- static simd16scalar unpack(simd16scalar& in)
- {
- simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
- simd16scalari result = _simd16_cvtepu8_epi32(tmp);
-
- return _simd16_castsi_ps(result);
- }
-
- static simd16scalar pack(simd16scalar& in)
- {
- // clang-format off
-
- simd16scalari result = _simd16_setzero_si();
-
- simdscalari inlo = _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
- simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
-
- simdscalari permlo = _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
- simdscalari permhi = _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
-
- simdscalari pack = _simd_packs_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
-
- const simdscalari zero = _simd_setzero_si();
-
- permlo = _simd_permute2f128_si(pack, zero, 0x20); // (2, 0) // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
- permhi = _simd_permute2f128_si(pack, zero, 0x31); // (3, 1) // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
-
- pack = _simd_packs_epi16(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
-
- result = _simd16_insert_si(result, pack, 0);
-
- return _simd16_castsi_ps(result);
-
- // clang-format on
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking 16 bit unsigned channels
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct PackTraits<16, false>
-{
- static const uint32_t MyNumBits = 16;
-
- static simdscalar loadSOA(const uint8_t* pSrc)
- {
-#if KNOB_SIMD_WIDTH == 8
- __m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_load_ps((const float*)pSrc);
- return _mm256_insertf128_ps(result, vLo, 0);
-#else
-#error Unsupported vector width
-#endif
- }
-
- static void storeSOA(uint8_t* pDst, simdscalar const& src)
- {
-#if KNOB_SIMD_WIDTH == 8
- // store 16B (2B * 8)
- _mm_store_ps((float*)pDst, _mm256_castps256_ps128(src));
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar unpack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
-#if KNOB_ARCH <= KNOB_ARCH_AVX
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
- __m128i resLo = _mm_cvtepu16_epi32(src);
- __m128i resHi =
- _mm_shuffle_epi8(src, _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
-
- __m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
- return _mm256_castsi256_ps(result);
-#else
- return _mm256_castsi256_ps(
- _mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
-#endif
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar pack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src = _simd_castps_si(in);
- __m256i res = _mm256_castsi128_si256(
- _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
- return _mm256_castsi256_ps(res);
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc)
- {
- simd16scalar result = _simd16_setzero_ps();
-
- simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float*>(pSrc));
-
- result = _simd16_insert_ps(result, resultlo, 0);
-
- return result;
- }
-
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
- {
- _simd_store_ps(reinterpret_cast<float*>(pDst), _simd16_extract_ps(src, 0));
- }
-
- static simd16scalar unpack(simd16scalar& in)
- {
- simd16scalari result = _simd16_cvtepu16_epi32(_simd_castps_si(_simd16_extract_ps(in, 0)));
-
- return _simd16_castsi_ps(result);
- }
-
- static simd16scalar pack(simd16scalar& in)
- {
- // clang-format off
-
- const simd16scalari zero = _simd16_setzero_si();
-
- simd16scalari permlo = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
- simd16scalari permhi = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
-
- simd16scalari result = _simd16_packus_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (16b)
-
- return _simd16_castsi_ps(result);
-
- // clang-format on
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking 16 bit signed channels
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct PackTraits<16, true>
-{
- static const uint32_t MyNumBits = 16;
-
- static simdscalar loadSOA(const uint8_t* pSrc)
- {
-#if KNOB_SIMD_WIDTH == 8
- __m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_load_ps((const float*)pSrc);
- return _mm256_insertf128_ps(result, vLo, 0);
-#else
-#error Unsupported vector width
-#endif
- }
-
- static void storeSOA(uint8_t* pDst, simdscalar const& src)
- {
-#if KNOB_SIMD_WIDTH == 8
- // store 16B (2B * 8)
- _mm_store_ps((float*)pDst, _mm256_castps256_ps128(src));
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar unpack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
-#if KNOB_ARCH <= KNOB_ARCH_AVX
- SWR_INVALID("I think this may be incorrect.");
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
- __m128i resLo = _mm_cvtepi16_epi32(src);
- __m128i resHi =
- _mm_shuffle_epi8(src, _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
-
- __m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
- return _mm256_castsi256_ps(result);
-#else
- return _mm256_castsi256_ps(
- _mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
-#endif
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar pack(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src = _simd_castps_si(in);
- __m256i res = _mm256_castsi128_si256(
- _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
- return _mm256_castsi256_ps(res);
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc)
- {
- simd16scalar result = _simd16_setzero_ps();
-
- simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float*>(pSrc));
-
- result = _simd16_insert_ps(result, resultlo, 0);
-
- return result;
- }
-
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
- {
- _simd_store_ps(reinterpret_cast<float*>(pDst), _simd16_extract_ps(src, 0));
- }
-
- static simd16scalar unpack(simd16scalar& in)
- {
- simd16scalari result = _simd16_cvtepu16_epi32(_simd_castps_si(_simd16_extract_ps(in, 0)));
-
- return _simd16_castsi_ps(result);
- }
-
- static simd16scalar pack(simd16scalar& in)
- {
- // clang-format off
-
- const simd16scalari zero = _simd16_setzero_si();
-
- simd16scalari permlo = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
- simd16scalari permhi = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
-
- simd16scalari result = _simd16_packs_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (16b)
-
- return _simd16_castsi_ps(result);
-
- // clang-format on
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// PackTraits - Helpers for packing / unpacking 32 bit channels
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct PackTraits<32, false>
-{
- static const uint32_t MyNumBits = 32;
-
- static simdscalar loadSOA(const uint8_t* pSrc) { return _simd_load_ps((const float*)pSrc); }
- static void storeSOA(uint8_t* pDst, simdscalar const& src)
- {
- _simd_store_ps((float*)pDst, src);
- }
- static simdscalar unpack(simdscalar& in) { return in; }
- static simdscalar pack(simdscalar& in) { return in; }
-
- static simd16scalar loadSOA_16(const uint8_t* pSrc)
- {
- return _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
- }
-
- static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
- {
- _simd16_store_ps(reinterpret_cast<float*>(pDst), src);
- }
-
- static simd16scalar unpack(simd16scalar& in) { return in; }
-
- static simd16scalar pack(simd16scalar& in) { return in; }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits.
-//////////////////////////////////////////////////////////////////////////
-template <SWR_TYPE type, uint32_t NumBits>
-struct TypeTraits : PackTraits<NumBits>
-{
- static const SWR_TYPE MyType = type;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UINT8
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UINT, 8> : PackTraits<8>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UINT;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UINT8
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_SINT, 8> : PackTraits<8, true>
-{
- static const SWR_TYPE MyType = SWR_TYPE_SINT;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UINT16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UINT, 16> : PackTraits<16>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UINT;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for SINT16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_SINT, 16> : PackTraits<16, true>
-{
- static const SWR_TYPE MyType = SWR_TYPE_SINT;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UINT32
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UINT, 32> : PackTraits<32>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UINT;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UINT32
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_SINT, 32> : PackTraits<32>
-{
- static const SWR_TYPE MyType = SWR_TYPE_SINT;
- static float toFloat() { return 0.0; }
- static float fromFloat()
- {
- SWR_NOT_IMPL;
- return 0.0;
- }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UNORM5
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UNORM, 5> : PackTraits<5>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 31.0f; }
- static float fromFloat() { return 31.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UNORM6
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UNORM, 6> : PackTraits<6>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 63.0f; }
- static float fromFloat() { return 63.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UNORM8
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UNORM, 8> : PackTraits<8>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 255.0f; }
- static float fromFloat() { return 255.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UNORM8
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_SNORM, 8> : PackTraits<8, true>
-{
- static const SWR_TYPE MyType = SWR_TYPE_SNORM;
- static float toFloat() { return 1.0f / 127.0f; }
- static float fromFloat() { return 127.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UNORM16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UNORM, 16> : PackTraits<16>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 65535.0f; }
- static float fromFloat() { return 65535.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for SNORM16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_SNORM, 16> : PackTraits<16, true>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 32767.0f; }
- static float fromFloat() { return 32767.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for UNORM24
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_UNORM, 24> : PackTraits<32>
-{
- static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 16777215.0f; }
- static float fromFloat() { return 16777215.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-// FLOAT Specializations from here on...
-//////////////////////////////////////////////////////////////////////////
-#define TO_M128i(a) _mm_castps_si128(a)
-#define TO_M128(a) _mm_castsi128_ps(a)
-
-#include "math.h"
-
-template <unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden>
-inline static __m128 fastpow(__m128 arg)
-{
- __m128 ret = arg;
-
- static const __m128 factor =
- _mm_set1_ps(exp2(127.0f * expden / expnum - 127.0f) *
- powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum));
-
- // Apply a constant pre-correction factor.
- ret = _mm_mul_ps(ret, factor);
-
- // Reinterpret arg as integer to obtain logarithm.
- // asm("cvtdq2ps %1, %0" : "=x" (ret) : "x" (ret));
- ret = _mm_cvtepi32_ps(_mm_castps_si128(ret));
-
- // Multiply logarithm by power.
- ret = _mm_mul_ps(ret, _mm_set1_ps(1.0f * expnum / expden));
-
- // Convert back to "integer" to exponentiate.
- // asm("cvtps2dq %1, %0" : "=x" (ret) : "x" (ret));
- ret = _mm_castsi128_ps(_mm_cvtps_epi32(ret));
-
- return ret;
-}
-
-inline static __m128 pow512_4(__m128 arg)
-{
- // 5/12 is too small, so compute the 4th root of 20/12 instead.
- // 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
- // weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
- __m128 xf = fastpow<2, 3, int(0.629960524947437 * 1e9), int(1e9)>(arg);
- __m128 xover = _mm_mul_ps(arg, xf);
-
- __m128 xfm1 = _mm_rsqrt_ps(xf);
- __m128 x2 = _mm_mul_ps(arg, arg);
- __m128 xunder = _mm_mul_ps(x2, xfm1);
-
- // sqrt2 * over + 2 * sqrt2 * under
- __m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
- _mm_add_ps(xover, xunder));
-
- xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
- xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
- return xavg;
-}
-
-inline static __m128 powf_wrapper(__m128 Base, float Exp)
-{
- float* f = (float*)(&Base);
-
- return _mm_set_ps(powf(f[3], Exp), powf(f[2], Exp), powf(f[1], Exp), powf(f[0], Exp));
-}
-
-static inline __m128 ConvertFloatToSRGB2(__m128& Src)
-{
- // create a mask with 0xFFFFFFFF in the DWORDs where the source is <= the minimal SRGB float
- // value
- __m128i CmpToSRGBThresholdMask = TO_M128i(_mm_cmpnlt_ps(_mm_set1_ps(0.0031308f), Src));
-
- // squeeze the mask down to 16 bits (4 bits per DWORD)
- int CompareResult = _mm_movemask_epi8(CmpToSRGBThresholdMask);
-
- __m128 Result;
-
- //
- if (CompareResult == 0xFFFF)
- {
- // all DWORDs are <= the threshold
- Result = _mm_mul_ps(Src, _mm_set1_ps(12.92f));
- }
- else if (CompareResult == 0x0)
- {
- // all DWORDs are > the threshold
- __m128 fSrc_0RGB = Src;
-
- // --> 1.055f * c(1.0f/2.4f) - 0.055f
-#if KNOB_USE_FAST_SRGB == TRUE
- // 1.0f / 2.4f is 5.0f / 12.0f which is used for approximation.
- __m128 f = pow512_4(fSrc_0RGB);
-#else
- __m128 f = powf_wrapper(fSrc_0RGB, 1.0f / 2.4f);
-#endif
- f = _mm_mul_ps(f, _mm_set1_ps(1.055f));
- Result = _mm_sub_ps(f, _mm_set1_ps(0.055f));
- }
- else
- {
- // some DWORDs are <= the threshold and some are > threshold
- __m128 Src_0RGB_mul_denorm = _mm_mul_ps(Src, _mm_set1_ps(12.92f));
-
- __m128 fSrc_0RGB = Src;
-
- // --> 1.055f * c(1.0f/2.4f) - 0.055f
-#if KNOB_USE_FAST_SRGB == TRUE
- // 1.0f / 2.4f is 5.0f / 12.0f which is used for approximation.
- __m128 f = pow512_4(fSrc_0RGB);
-#else
- __m128 f = powf_wrapper(fSrc_0RGB, 1.0f / 2.4f);
-#endif
- f = _mm_mul_ps(f, _mm_set1_ps(1.055f));
- f = _mm_sub_ps(f, _mm_set1_ps(0.055f));
-
- // Clear the alpha (is garbage after the sub)
- __m128i i = _mm_and_si128(TO_M128i(f),
- _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
-
- __m128i LessThanPart = _mm_and_si128(CmpToSRGBThresholdMask, TO_M128i(Src_0RGB_mul_denorm));
- __m128i GreaterEqualPart = _mm_andnot_si128(CmpToSRGBThresholdMask, i);
- __m128i CombinedParts = _mm_or_si128(LessThanPart, GreaterEqualPart);
-
- Result = TO_M128(CombinedParts);
- }
-
- return Result;
-}
-
-template <unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden>
-inline static simd16scalar SIMDCALL fastpow(simd16scalar const& value)
-{
- static const float factor1 = exp2(127.0f * expden / expnum - 127.0f) *
- powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum);
-
- // Apply a constant pre-correction factor.
- simd16scalar result = _simd16_mul_ps(value, _simd16_set1_ps(factor1));
-
- // Reinterpret arg as integer to obtain logarithm.
- // asm("cvtdq2ps %1, %0" : "=x" (result) : "x" (result));
- result = _simd16_cvtepi32_ps(_simd16_castps_si(result));
-
- // Multiply logarithm by power.
- result = _simd16_mul_ps(result, _simd16_set1_ps(1.0f * expnum / expden));
-
- // Convert back to "integer" to exponentiate.
- // asm("cvtps2dq %1, %0" : "=x" (result) : "x" (result));
- result = _simd16_castsi_ps(_simd16_cvtps_epi32(result));
-
- return result;
-}
-
-inline static simd16scalar SIMDCALL pow512_4(simd16scalar const& arg)
-{
- // 5/12 is too small, so compute the 4th root of 20/12 instead.
- // 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
- // weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
- simd16scalar xf = fastpow<2, 3, int(0.629960524947437 * 1e9), int(1e9)>(arg);
- simd16scalar xover = _simd16_mul_ps(arg, xf);
-
- simd16scalar xfm1 = _simd16_rsqrt_ps(xf);
- simd16scalar x2 = _simd16_mul_ps(arg, arg);
- simd16scalar xunder = _simd16_mul_ps(x2, xfm1);
-
- // sqrt2 * over + 2 * sqrt2 * under
- simd16scalar xavg =
- _simd16_mul_ps(_simd16_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
- _simd16_add_ps(xover, xunder));
-
- xavg = _simd16_mul_ps(xavg, _simd16_rsqrt_ps(xavg));
- xavg = _simd16_mul_ps(xavg, _simd16_rsqrt_ps(xavg));
-
- return xavg;
-}
-
-inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar& base, float exp)
-{
- const float* f = reinterpret_cast<const float*>(&base);
-
- return _simd16_set_ps(powf(f[15], exp),
- powf(f[14], exp),
- powf(f[13], exp),
- powf(f[12], exp),
- powf(f[11], exp),
- powf(f[10], exp),
- powf(f[9], exp),
- powf(f[8], exp),
- powf(f[7], exp),
- powf(f[6], exp),
- powf(f[5], exp),
- powf(f[4], exp),
- powf(f[3], exp),
- powf(f[2], exp),
- powf(f[1], exp),
- powf(f[0], exp));
-}
-
-// float to SRGB conversion formula
-//
-// if (value < 0.0031308f)
-// value *= 12.92f;
-// else
-// value = 1.055f * pow(value, 1.0f / 2.4f) - 0.055f;
-//
-static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar& value)
-{
- // create a mask where the source is < the minimal SRGB float value
- const simd16mask mask = _simd16_cmplt_ps_mask(value, _simd16_set1_ps(0.0031308f));
-
- // if all elements are < the threshold, result = value * 12.92
- simd16scalar result = _simd16_mul_ps(value, _simd16_set1_ps(12.92f));
-
- if (_simd16_mask2int(mask) != 0xFFFF)
- {
- // some elements are >= threshold, result = 1.055 * power(value, 1.0 / 2.4) - 0.055
-#if KNOB_USE_FAST_SRGB == TRUE
- // 1.0f / 2.4f is 5.0f / 12.0f which is used for approximation.
- simd16scalar result2 = pow512_4(value);
-#else
- simd16scalar result2 = powf_wrapper(value, 1.0f / 2.4f);
-#endif
-
- result2 = _simd16_mul_ps(result2, _simd16_set1_ps(1.055f));
- result2 = _simd16_sub_ps(result2, _simd16_set1_ps(0.055f));
-
-#if (KNOB_ARCH == KNOB_ARCH_AVX512)
- // only native AVX512 can directly use the computed mask for the blend operation
- result = _mm512_mask_blend_ps(mask, result2, result);
-#else
- result = _simd16_blendv_ps(
- result2, result, _simd16_cmplt_ps(value, _simd16_set1_ps(0.0031308f)));
-#endif
- }
-
- return result;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for FLOAT16
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
-{
- static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
- static float toFloat() { return 1.0f; }
- static float fromFloat() { return 1.0f; }
- static simdscalar convertSrgb(simdscalar& in)
- {
- SWR_NOT_IMPL;
- return _simd_setzero_ps();
- }
-
- static simdscalar pack(const simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
-#if (KNOB_ARCH == KNOB_ARCH_AVX)
- // input is 8 packed float32, output is 8 packed float16
- simdscalari src = _simd_castps_si(in);
-
- static const uint32_t FLOAT_EXP_BITS = 8;
- static const uint32_t FLOAT_MANTISSA_BITS = 23;
- static const uint32_t FLOAT_MANTISSA_MASK = (1U << FLOAT_MANTISSA_BITS) - 1;
- static const uint32_t FLOAT_EXP_MASK = ((1U << FLOAT_EXP_BITS) - 1) << FLOAT_MANTISSA_BITS;
-
- static const uint32_t HALF_EXP_BITS = 5;
- static const uint32_t HALF_MANTISSA_BITS = 10;
- static const uint32_t HALF_EXP_MASK = ((1U << HALF_EXP_BITS) - 1) << HALF_MANTISSA_BITS;
-
- // minimum exponent required, exponents below this are flushed to 0.
- static const int32_t HALF_EXP_MIN = -14;
- static const int32_t FLOAT_EXP_BIAS = 127;
- static const int32_t FLOAT_EXP_MIN = HALF_EXP_MIN + FLOAT_EXP_BIAS;
- static const int32_t FLOAT_EXP_MIN_FTZ =
- FLOAT_EXP_MIN - (HALF_MANTISSA_BITS + 1); // +1 for the lack of implicit significand
-
- // maximum exponent required, exponents above this are set to infinity
- static const int32_t HALF_EXP_MAX = 15;
- static const int32_t FLOAT_EXP_MAX = HALF_EXP_MAX + FLOAT_EXP_BIAS;
-
- const simdscalari vSignMask = _simd_set1_epi32(0x80000000);
- const simdscalari vExpMask = _simd_set1_epi32(FLOAT_EXP_MASK);
- const simdscalari vManMask = _simd_set1_epi32(FLOAT_MANTISSA_MASK);
- const simdscalari vExpMin =
- _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN << FLOAT_MANTISSA_BITS));
- const simdscalari vExpMinFtz =
- _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN_FTZ << FLOAT_MANTISSA_BITS));
- const simdscalari vExpMax =
- _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MAX << FLOAT_MANTISSA_BITS));
-
- simdscalari vSign = _simd_and_si(src, vSignMask);
- simdscalari vExp = _simd_and_si(src, vExpMask);
- simdscalari vMan = _simd_and_si(src, vManMask);
-
- simdscalari vFTZMask = _simd_cmplt_epi32(vExp, vExpMinFtz);
- simdscalari vDenormMask = _simd_andnot_si(vFTZMask, _simd_cmplt_epi32(vExp, vExpMin));
- simdscalari vInfMask = _simd_cmpeq_epi32(vExpMask, vExp);
- simdscalari vClampMask = _simd_andnot_si(vInfMask, _simd_cmplt_epi32(vExpMax, vExp));
-
- simdscalari vHalfExp = _simd_add_epi32(_simd_sub_epi32(vExp, vExpMin),
- _simd_set1_epi32(1U << FLOAT_MANTISSA_BITS));
-
- // pack output 16-bits into the lower 16-bits of each 32-bit channel
- simdscalari vDst =
- _simd_and_si(_simd_srli_epi32(vHalfExp, 13), _simd_set1_epi32(HALF_EXP_MASK));
- vDst = _simd_or_si(vDst, _simd_srli_epi32(vMan, FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
-
- // Flush To Zero
- vDst = _simd_andnot_si(vFTZMask, vDst);
- // Apply Infinites / NaN
- vDst = _simd_or_si(vDst, _simd_and_si(vInfMask, _simd_set1_epi32(HALF_EXP_MASK)));
-
- // Apply clamps
- vDst = _simd_andnot_si(vClampMask, vDst);
- vDst = _simd_or_si(vDst, _simd_and_si(vClampMask, _simd_set1_epi32(0x7BFF)));
-
- // Compute Denormals (subnormals)
- if (!_mm256_testz_si256(vDenormMask, vDenormMask))
- {
- uint32_t* pDenormMask = (uint32_t*)&vDenormMask;
- uint32_t* pExp = (uint32_t*)&vExp;
- uint32_t* pMan = (uint32_t*)&vMan;
- uint32_t* pDst = (uint32_t*)&vDst;
- for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
- {
- if (pDenormMask[i])
- {
- // Need to compute subnormal value
- uint32_t exponent = pExp[i] >> FLOAT_MANTISSA_BITS;
- uint32_t mantissa =
- pMan[i] | (1U << FLOAT_MANTISSA_BITS); // Denorms include no "implicit" 1s.
- // Make it explicit
-
- pDst[i] = mantissa >> ((FLOAT_EXP_MIN - exponent) +
- (FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
- }
- }
- }
-
- // Add in sign bits
- vDst = _simd_or_si(vDst, _simd_srli_epi32(vSign, 16));
-
- // Pack to lower 128-bits
- vDst = _mm256_castsi128_si256(
- _mm_packus_epi32(_mm256_castsi256_si128(vDst), _mm256_extractf128_si256(vDst, 1)));
-
-#if 0
-#if !defined(NDEBUG)
- simdscalari vCheck = _mm256_castsi128_si256(_mm256_cvtps_ph(in, _MM_FROUND_TRUNC));
-
- for (uint32_t i = 0; i < 4; ++i)
- {
- SWR_ASSERT(vCheck.m256i_i32[i] == vDst.m256i_i32[i]);
- }
-#endif
-#endif
-
- return _simd_castsi_ps(vDst);
-
-#else
- return _mm256_castsi256_ps(_mm256_castsi128_si256(_mm256_cvtps_ph(in, _MM_FROUND_TRUNC)));
-#endif
-#else
-#error Unsupported vector width
-#endif
- }
-
- static simdscalar unpack(const simdscalar& in)
- {
- // input is 8 packed float16, output is 8 packed float32
- SWR_NOT_IMPL; // @todo
- return _simd_setzero_ps();
- }
-
- static simd16scalar pack(const simd16scalar& in)
- {
- simd16scalari result = _simd16_setzero_si();
- simdscalari resultlo = _simd_setzero_si();
-
-#if (KNOB_ARCH == KNOB_ARCH_AVX)
- simdscalar simdlo = pack(_simd16_extract_ps(in, 0));
- simdscalar simdhi = pack(_simd16_extract_ps(in, 1));
-
- __m128i templo = _simd_extractf128_si(_simd_castps_si(simdlo), 0);
- __m128i temphi = _simd_extractf128_si(_simd_castps_si(simdhi), 0);
-
-#else
- __m128i templo = _mm256_cvtps_ph(_simd16_extract_ps(in, 0), _MM_FROUND_TRUNC);
- __m128i temphi = _mm256_cvtps_ph(_simd16_extract_ps(in, 1), _MM_FROUND_TRUNC);
-
-#endif
- resultlo = _simd_insertf128_si(resultlo, templo, 0);
- resultlo = _simd_insertf128_si(resultlo, temphi, 1);
-
- result = _simd16_insert_si(result, resultlo, 0);
-
- return _simd16_castsi_ps(result);
- }
-
- static simd16scalar unpack(const simd16scalar& in)
- {
- // input is 16 packed float16, output is 16 packed float32
- SWR_NOT_IMPL; // @todo
- return _simd16_setzero_ps();
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// TypeTraits - Format type traits specialization for FLOAT32
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct TypeTraits<SWR_TYPE_FLOAT, 32> : PackTraits<32>
-{
- static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
- static float toFloat() { return 1.0f; }
- static float fromFloat() { return 1.0f; }
- static inline simdscalar convertSrgb(simdscalar& in)
- {
-#if KNOB_SIMD_WIDTH == 8
- __m128 srcLo = _mm256_extractf128_ps(in, 0);
- __m128 srcHi = _mm256_extractf128_ps(in, 1);
-
- srcLo = ConvertFloatToSRGB2(srcLo);
- srcHi = ConvertFloatToSRGB2(srcHi);
-
- in = _mm256_insertf128_ps(in, srcLo, 0);
- in = _mm256_insertf128_ps(in, srcHi, 1);
-#else
-#error Unsupported vector width
-#endif
- return in;
- }
-
- static inline simd16scalar convertSrgb(simd16scalar& in) { return ConvertFloatToSRGB2(in); }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FormatIntType - Calculate base integer type for pixel components based
-/// on total number of bits. Components can be smaller
-/// that this type, but the entire pixel must not be
-/// any smaller than this type.
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t bits, bool bits8 = bits <= 8, bool bits16 = bits <= 16>
-struct FormatIntType
-{
- typedef uint32_t TYPE;
-};
-
-template <uint32_t bits>
-struct FormatIntType<bits, true, true>
-{
- typedef uint8_t TYPE;
-};
-
-template <uint32_t bits>
-struct FormatIntType<bits, false, true>
-{
- typedef uint16_t TYPE;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Format1 - Bitfield for single component formats.
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t x>
-union Format1
-{
- typedef typename FormatIntType<x>::TYPE TYPE;
- struct
- {
- TYPE r : x;
- };
-
- ///@ The following are here to provide full template needed in Formats.
- struct
- {
- TYPE g : x;
- };
- struct
- {
- TYPE b : x;
- };
- struct
- {
- TYPE a : x;
- };
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Format2 - Bitfield for 2 component formats.
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t x, uint32_t y>
-union Format2
-{
- typedef typename FormatIntType<x + y>::TYPE TYPE;
-
- struct
- {
- TYPE r : x;
- TYPE g : y;
- };
- struct
- {
- ///@ The following are here to provide full template needed in Formats.
- TYPE b : x;
- TYPE a : y;
- };
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Format3 - Bitfield for 3 component formats.
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t x, uint32_t y, uint32_t z>
-union Format3
-{
- typedef typename FormatIntType<x + y + z>::TYPE TYPE;
-
- struct
- {
- TYPE r : x;
- TYPE g : y;
- TYPE b : z;
- };
- TYPE a; ///@note This is here to provide full template needed in Formats.
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Format4 - Bitfield for 4 component formats.
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t x, uint32_t y, uint32_t z, uint32_t w>
-struct Format4
-{
- typedef typename FormatIntType<x + y + z + w>::TYPE TYPE;
-
- TYPE r : x;
- TYPE g : y;
- TYPE b : z;
- TYPE a : w;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// ComponentTraits - Default components
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t x, uint32_t y, uint32_t z, uint32_t w>
-struct Defaults
-{
- INLINE static uint32_t GetDefault(uint32_t comp)
- {
- static const uint32_t defaults[4]{x, y, z, w};
- return defaults[comp];
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// ComponentTraits - Component type traits.
-//////////////////////////////////////////////////////////////////////////
-template <SWR_TYPE X,
- uint32_t NumBitsX,
- SWR_TYPE Y = SWR_TYPE_UNKNOWN,
- uint32_t NumBitsY = 0,
- SWR_TYPE Z = SWR_TYPE_UNKNOWN,
- uint32_t NumBitsZ = 0,
- SWR_TYPE W = SWR_TYPE_UNKNOWN,
- uint32_t NumBitsW = 0>
-struct ComponentTraits
-{
- INLINE static SWR_TYPE GetType(uint32_t comp)
- {
- static const SWR_TYPE CompType[4]{X, Y, Z, W};
- return CompType[comp];
- }
-
- INLINE static constexpr uint32_t GetConstBPC(uint32_t comp)
- {
- return (comp == 3) ? NumBitsW
- : ((comp == 2) ? NumBitsZ : ((comp == 1) ? NumBitsY : NumBitsX));
- }
-
- INLINE static uint32_t GetBPC(uint32_t comp)
- {
- static const uint32_t MyBpc[4]{NumBitsX, NumBitsY, NumBitsZ, NumBitsW};
- return MyBpc[comp];
- }
-
- INLINE static bool isNormalized(uint32_t comp)
- {
- switch (comp)
- {
- case 0:
- return (X == SWR_TYPE_UNORM || X == SWR_TYPE_SNORM) ? true : false;
- case 1:
- return (Y == SWR_TYPE_UNORM || Y == SWR_TYPE_SNORM) ? true : false;
- case 2:
- return (Z == SWR_TYPE_UNORM || Z == SWR_TYPE_SNORM) ? true : false;
- case 3:
- return (W == SWR_TYPE_UNORM || W == SWR_TYPE_SNORM) ? true : false;
- }
- SWR_INVALID("Invalid component: %d", comp);
- return false;
- }
-
- INLINE static float toFloat(uint32_t comp)
- {
- switch (comp)
- {
- case 0:
- return TypeTraits<X, NumBitsX>::toFloat();
- case 1:
- return TypeTraits<Y, NumBitsY>::toFloat();
- case 2:
- return TypeTraits<Z, NumBitsZ>::toFloat();
- case 3:
- return TypeTraits<W, NumBitsW>::toFloat();
- }
- SWR_INVALID("Invalid component: %d", comp);
- return TypeTraits<X, NumBitsX>::toFloat();
- }
-
- INLINE static float fromFloat(uint32_t comp)
- {
- switch (comp)
- {
- case 0:
- return TypeTraits<X, NumBitsX>::fromFloat();
- case 1:
- return TypeTraits<Y, NumBitsY>::fromFloat();
- case 2:
- return TypeTraits<Z, NumBitsZ>::fromFloat();
- case 3:
- return TypeTraits<W, NumBitsW>::fromFloat();
- }
- SWR_INVALID("Invalid component: %d", comp);
- return TypeTraits<X, NumBitsX>::fromFloat();
- }
-
- INLINE static void loadSOA(uint32_t comp, const uint8_t* pSrc, simdscalar& dst)
- {
- switch (comp)
- {
- case 0:
- dst = TypeTraits<X, NumBitsX>::loadSOA(pSrc);
- return;
- case 1:
- dst = TypeTraits<Y, NumBitsY>::loadSOA(pSrc);
- return;
- case 2:
- dst = TypeTraits<Z, NumBitsZ>::loadSOA(pSrc);
- return;
- case 3:
- dst = TypeTraits<W, NumBitsW>::loadSOA(pSrc);
- return;
- }
- SWR_INVALID("Invalid component: %d", comp);
- dst = TypeTraits<X, NumBitsX>::loadSOA(pSrc);
- }
-
- INLINE static void storeSOA(uint32_t comp, uint8_t* pDst, simdscalar const& src)
- {
- switch (comp)
- {
- case 0:
- TypeTraits<X, NumBitsX>::storeSOA(pDst, src);
- return;
- case 1:
- TypeTraits<Y, NumBitsY>::storeSOA(pDst, src);
- return;
- case 2:
- TypeTraits<Z, NumBitsZ>::storeSOA(pDst, src);
- return;
- case 3:
- TypeTraits<W, NumBitsW>::storeSOA(pDst, src);
- return;
- }
- SWR_INVALID("Invalid component: %d", comp);
- }
-
- INLINE static simdscalar unpack(uint32_t comp, simdscalar& in)
- {
- simdscalar out;
- switch (comp)
- {
- case 0:
- out = TypeTraits<X, NumBitsX>::unpack(in);
- break;
- case 1:
- out = TypeTraits<Y, NumBitsY>::unpack(in);
- break;
- case 2:
- out = TypeTraits<Z, NumBitsZ>::unpack(in);
- break;
- case 3:
- out = TypeTraits<W, NumBitsW>::unpack(in);
- break;
- default:
- SWR_INVALID("Invalid component: %d", comp);
- out = in;
- break;
- }
- return out;
- }
-
- INLINE static simdscalar pack(uint32_t comp, simdscalar& in)
- {
- simdscalar out;
- switch (comp)
- {
- case 0:
- out = TypeTraits<X, NumBitsX>::pack(in);
- break;
- case 1:
- out = TypeTraits<Y, NumBitsY>::pack(in);
- break;
- case 2:
- out = TypeTraits<Z, NumBitsZ>::pack(in);
- break;
- case 3:
- out = TypeTraits<W, NumBitsW>::pack(in);
- break;
- default:
- SWR_INVALID("Invalid component: %d", comp);
- out = in;
- break;
- }
- return out;
- }
-
- INLINE static simdscalar convertSrgb(uint32_t comp, simdscalar& in)
- {
- switch (comp)
- {
- case 0:
- return TypeTraits<X, NumBitsX>::convertSrgb(in);
- case 1:
- return TypeTraits<Y, NumBitsY>::convertSrgb(in);
- case 2:
- return TypeTraits<Z, NumBitsZ>::convertSrgb(in);
- case 3:
- return TypeTraits<W, NumBitsW>::convertSrgb(in);
- }
- SWR_INVALID("Invalid component: %d", comp);
- return TypeTraits<X, NumBitsX>::convertSrgb(in);
- }
-
- INLINE static void SIMDCALL loadSOA(uint32_t comp, const uint8_t* pSrc, simd16scalar& dst)
- {
- switch (comp)
- {
- case 0:
- dst = TypeTraits<X, NumBitsX>::loadSOA_16(pSrc);
- return;
- case 1:
- dst = TypeTraits<Y, NumBitsY>::loadSOA_16(pSrc);
- return;
- case 2:
- dst = TypeTraits<Z, NumBitsZ>::loadSOA_16(pSrc);
- return;
- case 3:
- dst = TypeTraits<W, NumBitsW>::loadSOA_16(pSrc);
- return;
- }
- SWR_INVALID("Invalid component: %d", comp);
- dst = TypeTraits<X, NumBitsX>::loadSOA_16(pSrc);
- }
-
- INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t* pDst, simd16scalar const& src)
- {
- switch (comp)
- {
- case 0:
- TypeTraits<X, NumBitsX>::storeSOA(pDst, src);
- return;
- case 1:
- TypeTraits<Y, NumBitsY>::storeSOA(pDst, src);
- return;
- case 2:
- TypeTraits<Z, NumBitsZ>::storeSOA(pDst, src);
- return;
- case 3:
- TypeTraits<W, NumBitsW>::storeSOA(pDst, src);
- return;
- }
- SWR_INVALID("Invalid component: %d", comp);
- TypeTraits<X, NumBitsX>::storeSOA(pDst, src);
- }
-
- INLINE static simd16scalar unpack(uint32_t comp, simd16scalar& in)
- {
- switch (comp)
- {
- case 0:
- return TypeTraits<X, NumBitsX>::unpack(in);
- case 1:
- return TypeTraits<Y, NumBitsY>::unpack(in);
- case 2:
- return TypeTraits<Z, NumBitsZ>::unpack(in);
- case 3:
- return TypeTraits<W, NumBitsW>::unpack(in);
- }
- SWR_INVALID("Invalid component: %d", comp);
- return TypeTraits<X, NumBitsX>::unpack(in);
- }
-
- INLINE static simd16scalar pack(uint32_t comp, simd16scalar& in)
- {
- switch (comp)
- {
- case 0:
- return TypeTraits<X, NumBitsX>::pack(in);
- case 1:
- return TypeTraits<Y, NumBitsY>::pack(in);
- case 2:
- return TypeTraits<Z, NumBitsZ>::pack(in);
- case 3:
- return TypeTraits<W, NumBitsW>::pack(in);
- }
- SWR_INVALID("Invalid component: %d", comp);
- return TypeTraits<X, NumBitsX>::pack(in);
- }
-
- INLINE static simd16scalar convertSrgb(uint32_t comp, simd16scalar& in)
- {
- switch (comp)
- {
- case 0:
- return TypeTraits<X, NumBitsX>::convertSrgb(in);
- case 1:
- return TypeTraits<Y, NumBitsY>::convertSrgb(in);
- case 2:
- return TypeTraits<Z, NumBitsZ>::convertSrgb(in);
- case 3:
- return TypeTraits<W, NumBitsW>::convertSrgb(in);
- }
- SWR_INVALID("Invalid component: %d", comp);
- return TypeTraits<X, NumBitsX>::convertSrgb(in);
- }
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_utils.h b/src/gallium/drivers/swr/rasterizer/core/format_utils.h
deleted file mode 100644
index 7c0b62f1910..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/format_utils.h
+++ /dev/null
@@ -1,939 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file utils.h
- *
- * @brief Utilities used by SWR core related to pixel formats.
- *
- ******************************************************************************/
-#pragma once
-
-#include "core/utils.h"
-#include "common/simdintrin.h"
-
-INLINE
-void vTranspose(simd4scalar& row0, simd4scalar& row1, simd4scalar& row2, simd4scalar& row3)
-{
- simd4scalari row0i = SIMD128::castps_si(row0);
- simd4scalari row1i = SIMD128::castps_si(row1);
- simd4scalari row2i = SIMD128::castps_si(row2);
- simd4scalari row3i = SIMD128::castps_si(row3);
-
- simd4scalari vTemp = row2i;
- row2i = SIMD128::unpacklo_epi32(row2i, row3i);
- vTemp = SIMD128::unpackhi_epi32(vTemp, row3i);
-
- row3i = row0i;
- row0i = SIMD128::unpacklo_epi32(row0i, row1i);
- row3i = SIMD128::unpackhi_epi32(row3i, row1i);
-
- row1i = row0i;
- row0i = SIMD128::unpacklo_epi64(row0i, row2i);
- row1i = SIMD128::unpackhi_epi64(row1i, row2i);
-
- row2i = row3i;
- row2i = SIMD128::unpacklo_epi64(row2i, vTemp);
- row3i = SIMD128::unpackhi_epi64(row3i, vTemp);
-
- row0 = SIMD128::castsi_ps(row0i);
- row1 = SIMD128::castsi_ps(row1i);
- row2 = SIMD128::castsi_ps(row2i);
- row3 = SIMD128::castsi_ps(row3i);
-}
-
-INLINE
-void vTranspose(simd4scalari& row0, simd4scalari& row1, simd4scalari& row2, simd4scalari& row3)
-{
- simd4scalari vTemp = row2;
- row2 = SIMD128::unpacklo_epi32(row2, row3);
- vTemp = SIMD128::unpackhi_epi32(vTemp, row3);
-
- row3 = row0;
- row0 = SIMD128::unpacklo_epi32(row0, row1);
- row3 = SIMD128::unpackhi_epi32(row3, row1);
-
- row1 = row0;
- row0 = SIMD128::unpacklo_epi64(row0, row2);
- row1 = SIMD128::unpackhi_epi64(row1, row2);
-
- row2 = row3;
- row2 = SIMD128::unpacklo_epi64(row2, vTemp);
- row3 = SIMD128::unpackhi_epi64(row3, vTemp);
-}
-
-#if KNOB_SIMD_WIDTH == 8
-INLINE
-void vTranspose3x8(simd4scalar (&vDst)[8],
- const simdscalar& vSrc0,
- const simdscalar& vSrc1,
- const simdscalar& vSrc2)
-{
- simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2); // x0z0x1z1 x4z4x5z5
- simdscalar r1rx = _simd_unpacklo_ps(vSrc1, _simd_setzero_ps()); // y0w0y1w1 y4w4y5w5
- simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx); // x0y0z0w0 x4y4z4w4
- simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx); // x1y1z1w1 x5y5z5w5
-
- r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2); // x2z2x3z3 x6z6x7z7
- r1rx = _simd_unpackhi_ps(vSrc1, _simd_setzero_ps()); // y2w2y3w3 y6w6yw77
- simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx); // x2y2z2w2 x6y6z6w6
- simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx); // x3y3z3w3 x7y7z7w7
-
- vDst[0] = _simd_extractf128_ps(r02r1xlolo, 0);
- vDst[1] = _simd_extractf128_ps(r02r1xlohi, 0);
- vDst[2] = _simd_extractf128_ps(r02r1xhilo, 0);
- vDst[3] = _simd_extractf128_ps(r02r1xhihi, 0);
-
- vDst[4] = _simd_extractf128_ps(r02r1xlolo, 1);
- vDst[5] = _simd_extractf128_ps(r02r1xlohi, 1);
- vDst[6] = _simd_extractf128_ps(r02r1xhilo, 1);
- vDst[7] = _simd_extractf128_ps(r02r1xhihi, 1);
-}
-
-INLINE
-void vTranspose4x8(simd4scalar (&vDst)[8],
- const simdscalar& vSrc0,
- const simdscalar& vSrc1,
- const simdscalar& vSrc2,
- const simdscalar& vSrc3)
-{
- simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2); // x0z0x1z1 x4z4x5z5
- simdscalar r1rx = _simd_unpacklo_ps(vSrc1, vSrc3); // y0w0y1w1 y4w4y5w5
- simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx); // x0y0z0w0 x4y4z4w4
- simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx); // x1y1z1w1 x5y5z5w5
-
- r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2); // x2z2x3z3 x6z6x7z7
- r1rx = _simd_unpackhi_ps(vSrc1, vSrc3); // y2w2y3w3 y6w6yw77
- simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx); // x2y2z2w2 x6y6z6w6
- simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx); // x3y3z3w3 x7y7z7w7
-
- vDst[0] = _simd_extractf128_ps(r02r1xlolo, 0);
- vDst[1] = _simd_extractf128_ps(r02r1xlohi, 0);
- vDst[2] = _simd_extractf128_ps(r02r1xhilo, 0);
- vDst[3] = _simd_extractf128_ps(r02r1xhihi, 0);
-
- vDst[4] = _simd_extractf128_ps(r02r1xlolo, 1);
- vDst[5] = _simd_extractf128_ps(r02r1xlohi, 1);
- vDst[6] = _simd_extractf128_ps(r02r1xhilo, 1);
- vDst[7] = _simd_extractf128_ps(r02r1xhihi, 1);
-}
-
-INLINE
-void vTranspose4x16(simd16scalar (&dst)[4],
- const simd16scalar& src0,
- const simd16scalar& src1,
- const simd16scalar& src2,
- const simd16scalar& src3)
-{
- const simd16scalari perm =
- _simd16_set_epi32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0);
-
- // pre-permute input to setup the right order after all the unpacking
-
- simd16scalar pre0 = _simd16_permute_ps(src0, perm); // r
- simd16scalar pre1 = _simd16_permute_ps(src1, perm); // g
- simd16scalar pre2 = _simd16_permute_ps(src2, perm); // b
- simd16scalar pre3 = _simd16_permute_ps(src3, perm); // a
-
- simd16scalar rblo = _simd16_unpacklo_ps(pre0, pre2);
- simd16scalar galo = _simd16_unpacklo_ps(pre1, pre3);
- simd16scalar rbhi = _simd16_unpackhi_ps(pre0, pre2);
- simd16scalar gahi = _simd16_unpackhi_ps(pre1, pre3);
-
- dst[0] = _simd16_unpacklo_ps(rblo, galo);
- dst[1] = _simd16_unpackhi_ps(rblo, galo);
- dst[2] = _simd16_unpacklo_ps(rbhi, gahi);
- dst[3] = _simd16_unpackhi_ps(rbhi, gahi);
-}
-
-INLINE
-void vTranspose8x8(simdscalar (&vDst)[8],
- const simdscalar& vMask0,
- const simdscalar& vMask1,
- const simdscalar& vMask2,
- const simdscalar& vMask3,
- const simdscalar& vMask4,
- const simdscalar& vMask5,
- const simdscalar& vMask6,
- const simdscalar& vMask7)
-{
- simdscalar __t0 = _simd_unpacklo_ps(vMask0, vMask1);
- simdscalar __t1 = _simd_unpackhi_ps(vMask0, vMask1);
- simdscalar __t2 = _simd_unpacklo_ps(vMask2, vMask3);
- simdscalar __t3 = _simd_unpackhi_ps(vMask2, vMask3);
- simdscalar __t4 = _simd_unpacklo_ps(vMask4, vMask5);
- simdscalar __t5 = _simd_unpackhi_ps(vMask4, vMask5);
- simdscalar __t6 = _simd_unpacklo_ps(vMask6, vMask7);
- simdscalar __t7 = _simd_unpackhi_ps(vMask6, vMask7);
- simdscalar __tt0 = _simd_shuffle_ps(__t0, __t2, _MM_SHUFFLE(1, 0, 1, 0));
- simdscalar __tt1 = _simd_shuffle_ps(__t0, __t2, _MM_SHUFFLE(3, 2, 3, 2));
- simdscalar __tt2 = _simd_shuffle_ps(__t1, __t3, _MM_SHUFFLE(1, 0, 1, 0));
- simdscalar __tt3 = _simd_shuffle_ps(__t1, __t3, _MM_SHUFFLE(3, 2, 3, 2));
- simdscalar __tt4 = _simd_shuffle_ps(__t4, __t6, _MM_SHUFFLE(1, 0, 1, 0));
- simdscalar __tt5 = _simd_shuffle_ps(__t4, __t6, _MM_SHUFFLE(3, 2, 3, 2));
- simdscalar __tt6 = _simd_shuffle_ps(__t5, __t7, _MM_SHUFFLE(1, 0, 1, 0));
- simdscalar __tt7 = _simd_shuffle_ps(__t5, __t7, _MM_SHUFFLE(3, 2, 3, 2));
- vDst[0] = _simd_permute2f128_ps(__tt0, __tt4, 0x20);
- vDst[1] = _simd_permute2f128_ps(__tt1, __tt5, 0x20);
- vDst[2] = _simd_permute2f128_ps(__tt2, __tt6, 0x20);
- vDst[3] = _simd_permute2f128_ps(__tt3, __tt7, 0x20);
- vDst[4] = _simd_permute2f128_ps(__tt0, __tt4, 0x31);
- vDst[5] = _simd_permute2f128_ps(__tt1, __tt5, 0x31);
- vDst[6] = _simd_permute2f128_ps(__tt2, __tt6, 0x31);
- vDst[7] = _simd_permute2f128_ps(__tt3, __tt7, 0x31);
-}
-
-INLINE
-void vTranspose8x8(simdscalar (&vDst)[8],
- const simdscalari& vMask0,
- const simdscalari& vMask1,
- const simdscalari& vMask2,
- const simdscalari& vMask3,
- const simdscalari& vMask4,
- const simdscalari& vMask5,
- const simdscalari& vMask6,
- const simdscalari& vMask7)
-{
- vTranspose8x8(vDst,
- _simd_castsi_ps(vMask0),
- _simd_castsi_ps(vMask1),
- _simd_castsi_ps(vMask2),
- _simd_castsi_ps(vMask3),
- _simd_castsi_ps(vMask4),
- _simd_castsi_ps(vMask5),
- _simd_castsi_ps(vMask6),
- _simd_castsi_ps(vMask7));
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-/// TranposeSingleComponent
-//////////////////////////////////////////////////////////////////////////
-template <uint32_t bpp>
-struct TransposeSingleComponent
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Pass-thru for single component.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
- memcpy(pDst, pSrc, (bpp * KNOB_SIMD_WIDTH) / 8);
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
- memcpy(pDst, pSrc, (bpp * KNOB_SIMD16_WIDTH) / 8);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose8_8_8_8
-//////////////////////////////////////////////////////////////////////////
-struct Transpose8_8_8_8
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 8_8_8_8 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
- simdscalari src = _simd_load_si((const simdscalari*)pSrc);
-
-#if KNOB_SIMD_WIDTH == 8
-#if KNOB_ARCH <= KNOB_ARCH_AVX
- simd4scalari c0c1 = src.v4[0]; // rrrrrrrrgggggggg
- simd4scalari c2c3 =
- SIMD128::castps_si(_simd_extractf128_ps(_simd_castsi_ps(src), 1)); // bbbbbbbbaaaaaaaa
- simd4scalari c0c2 = SIMD128::unpacklo_epi64(c0c1, c2c3); // rrrrrrrrbbbbbbbb
- simd4scalari c1c3 = SIMD128::unpackhi_epi64(c0c1, c2c3); // ggggggggaaaaaaaa
- simd4scalari c01 = SIMD128::unpacklo_epi8(c0c2, c1c3); // rgrgrgrgrgrgrgrg
- simd4scalari c23 = SIMD128::unpackhi_epi8(c0c2, c1c3); // babababababababa
- simd4scalari c0123lo = SIMD128::unpacklo_epi16(c01, c23); // rgbargbargbargba
- simd4scalari c0123hi = SIMD128::unpackhi_epi16(c01, c23); // rgbargbargbargba
- SIMD128::store_si((simd4scalari*)pDst, c0123lo);
- SIMD128::store_si((simd4scalari*)(pDst + 16), c0123hi);
-#else
- simdscalari dst01 = _simd_shuffle_epi8(src,
- _simd_set_epi32(0x0f078080,
- 0x0e068080,
- 0x0d058080,
- 0x0c048080,
- 0x80800b03,
- 0x80800a02,
- 0x80800901,
- 0x80800800));
- simdscalari dst23 = _mm256_permute2x128_si256(src, src, 0x01);
- dst23 = _simd_shuffle_epi8(dst23,
- _simd_set_epi32(0x80800f07,
- 0x80800e06,
- 0x80800d05,
- 0x80800c04,
- 0x0b038080,
- 0x0a028080,
- 0x09018080,
- 0x08008080));
- simdscalari dst = _simd_or_si(dst01, dst23);
- _simd_store_si((simdscalari*)pDst, dst);
-#endif
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simd4scalari src0 = SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc)); // rrrrrrrrrrrrrrrr
- simd4scalari src1 = SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 1); // gggggggggggggggg
- simd4scalari src2 = SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
- simd4scalari src3 = SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 3); // aaaaaaaaaaaaaaaa
-
- simd16scalari cvt0 = _simd16_cvtepu8_epi32(src0);
- simd16scalari cvt1 = _simd16_cvtepu8_epi32(src1);
- simd16scalari cvt2 = _simd16_cvtepu8_epi32(src2);
- simd16scalari cvt3 = _simd16_cvtepu8_epi32(src3);
-
- simd16scalari shl1 = _simd16_slli_epi32(cvt1, 8);
- simd16scalari shl2 = _simd16_slli_epi32(cvt2, 16);
- simd16scalari shl3 = _simd16_slli_epi32(cvt3, 24);
-
- simd16scalari dst = _simd16_or_si(_simd16_or_si(cvt0, shl1), _simd16_or_si(shl2, shl3));
-
- _simd16_store_si(reinterpret_cast<simd16scalari*>(pDst), dst); // rgbargbargbargbargbargbargbargbargbargbargbargbargbargbargbargba
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose8_8_8
-//////////////////////////////////////////////////////////////////////////
-struct Transpose8_8_8
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 8_8_8 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose8_8
-//////////////////////////////////////////////////////////////////////////
-struct Transpose8_8
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 8_8 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src = _simd_load_si((const simdscalari*)pSrc);
-
- simd4scalari rg = src.v4[0]; // rrrrrrrr gggggggg
- simd4scalari g = SIMD128::unpackhi_epi64(rg, rg); // gggggggg gggggggg
- rg = SIMD128::unpacklo_epi8(rg, g);
- SIMD128::store_si((simd4scalari*)pDst, rg);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simd4scalari src0 = SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc)); // rrrrrrrrrrrrrrrr
- simd4scalari src1 = SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 1); // gggggggggggggggg
-
- simdscalari cvt0 = _simd_cvtepu8_epi16(src0);
- simdscalari cvt1 = _simd_cvtepu8_epi16(src1);
-
- simdscalari shl1 = _simd_slli_epi32(cvt1, 8);
-
- simdscalari dst = _simd_or_si(cvt0, shl1);
-
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst), dst); // rgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrg
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose32_32_32_32
-//////////////////////////////////////////////////////////////////////////
-struct Transpose32_32_32_32
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 32_32_32_32 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalar src0 = _simd_load_ps((const float*)pSrc);
- simdscalar src1 = _simd_load_ps((const float*)pSrc + 8);
- simdscalar src2 = _simd_load_ps((const float*)pSrc + 16);
- simdscalar src3 = _simd_load_ps((const float*)pSrc + 24);
-
- simd4scalar vDst[8];
- vTranspose4x8(vDst, src0, src1, src2, src3);
- SIMD128::store_ps((float*)pDst, vDst[0]);
- SIMD128::store_ps((float*)pDst + 4, vDst[1]);
- SIMD128::store_ps((float*)pDst + 8, vDst[2]);
- SIMD128::store_ps((float*)pDst + 12, vDst[3]);
- SIMD128::store_ps((float*)pDst + 16, vDst[4]);
- SIMD128::store_ps((float*)pDst + 20, vDst[5]);
- SIMD128::store_ps((float*)pDst + 24, vDst[6]);
- SIMD128::store_ps((float*)pDst + 28, vDst[7]);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
- simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16);
- simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 32);
- simd16scalar src3 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 48);
-
- simd16scalar dst[4];
-
- vTranspose4x16(dst, src0, src1, src2, src3);
-
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst[0]);
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst[1]);
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 32, dst[2]);
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 48, dst[3]);
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose32_32_32
-//////////////////////////////////////////////////////////////////////////
-struct Transpose32_32_32
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 32_32_32 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalar src0 = _simd_load_ps((const float*)pSrc);
- simdscalar src1 = _simd_load_ps((const float*)pSrc + 8);
- simdscalar src2 = _simd_load_ps((const float*)pSrc + 16);
-
- simd4scalar vDst[8];
- vTranspose3x8(vDst, src0, src1, src2);
- SIMD128::store_ps((float*)pDst, vDst[0]);
- SIMD128::store_ps((float*)pDst + 4, vDst[1]);
- SIMD128::store_ps((float*)pDst + 8, vDst[2]);
- SIMD128::store_ps((float*)pDst + 12, vDst[3]);
- SIMD128::store_ps((float*)pDst + 16, vDst[4]);
- SIMD128::store_ps((float*)pDst + 20, vDst[5]);
- SIMD128::store_ps((float*)pDst + 24, vDst[6]);
- SIMD128::store_ps((float*)pDst + 28, vDst[7]);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
- simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16);
- simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 32);
- simd16scalar src3 = _simd16_setzero_ps();
-
- simd16scalar dst[4];
-
- vTranspose4x16(dst, src0, src1, src2, src3);
-
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst[0]);
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst[1]);
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 32, dst[2]);
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 48, dst[3]);
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose32_32
-//////////////////////////////////////////////////////////////////////////
-struct Transpose32_32
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 32_32 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- const float* pfSrc = (const float*)pSrc;
- simd4scalar src_r0 = SIMD128::load_ps(pfSrc + 0);
- simd4scalar src_r1 = SIMD128::load_ps(pfSrc + 4);
- simd4scalar src_g0 = SIMD128::load_ps(pfSrc + 8);
- simd4scalar src_g1 = SIMD128::load_ps(pfSrc + 12);
-
- simd4scalar dst0 = SIMD128::unpacklo_ps(src_r0, src_g0);
- simd4scalar dst1 = SIMD128::unpackhi_ps(src_r0, src_g0);
- simd4scalar dst2 = SIMD128::unpacklo_ps(src_r1, src_g1);
- simd4scalar dst3 = SIMD128::unpackhi_ps(src_r1, src_g1);
-
- float* pfDst = (float*)pDst;
- SIMD128::store_ps(pfDst + 0, dst0);
- SIMD128::store_ps(pfDst + 4, dst1);
- SIMD128::store_ps(pfDst + 8, dst2);
- SIMD128::store_ps(pfDst + 12, dst3);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc)); // rrrrrrrrrrrrrrrr
- simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16); // gggggggggggggggg
-
- simd16scalar tmp0 = _simd16_unpacklo_ps(src0, src1); // r0 g0 r1 g1 r4 g4 r5 g5 r8 g8 r9 g9 rC gC rD gD
- simd16scalar tmp1 = _simd16_unpackhi_ps(src0, src1); // r2 g2 r3 g3 r6 g6 r7 g7 rA gA rB gB rE gE rF gF
-
- simd16scalar per0 = _simd16_permute2f128_ps(tmp0, tmp1, 0x44); // (1, 0, 1, 0) // r0 g0 r1 g1 r4 g4 r5 g5 r2 g2 r3 g3 r6 g6 r7 g7
- simd16scalar per1 = _simd16_permute2f128_ps(tmp0, tmp1, 0xEE); // (3, 2, 3, 2) // r8 g8 r9 g9 rC gC rD gD rA gA rB gB rE gE rF gF
-
- simd16scalar dst0 = _simd16_permute2f128_ps(per0, per0, 0xD8); // (3, 1, 2, 0) // r0 g0 r1 g1 r2 g2 r3 g3 r4 g4 r5 g5 r6 g6 r7 g7
- simd16scalar dst1 = _simd16_permute2f128_ps(per1, per1, 0xD8); // (3, 1, 2, 0) // r8 g8 r9 g9 rA gA rB gB rC gC rD gD rE gE rF gF
-
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
- _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst1); // rgrgrgrgrgrgrgrg
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose16_16_16_16
-//////////////////////////////////////////////////////////////////////////
-struct Transpose16_16_16_16
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 16_16_16_16 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src_rg = _simd_load_si((const simdscalari*)pSrc);
- simdscalari src_ba = _simd_load_si((const simdscalari*)(pSrc + sizeof(simdscalari)));
-
- simd4scalari src_r = _simd_extractf128_si(src_rg, 0);
- simd4scalari src_g = _simd_extractf128_si(src_rg, 1);
- simd4scalari src_b = _simd_extractf128_si(src_ba, 0);
- simd4scalari src_a = _simd_extractf128_si(src_ba, 1);
-
- simd4scalari rg0 = SIMD128::unpacklo_epi16(src_r, src_g);
- simd4scalari rg1 = SIMD128::unpackhi_epi16(src_r, src_g);
- simd4scalari ba0 = SIMD128::unpacklo_epi16(src_b, src_a);
- simd4scalari ba1 = SIMD128::unpackhi_epi16(src_b, src_a);
-
- simd4scalari dst0 = SIMD128::unpacklo_epi32(rg0, ba0);
- simd4scalari dst1 = SIMD128::unpackhi_epi32(rg0, ba0);
- simd4scalari dst2 = SIMD128::unpacklo_epi32(rg1, ba1);
- simd4scalari dst3 = SIMD128::unpackhi_epi32(rg1, ba1);
-
- SIMD128::store_si(((simd4scalari*)pDst) + 0, dst0);
- SIMD128::store_si(((simd4scalari*)pDst) + 1, dst1);
- SIMD128::store_si(((simd4scalari*)pDst) + 2, dst2);
- SIMD128::store_si(((simd4scalari*)pDst) + 3, dst3);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
- simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
- simdscalari src2 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
- simdscalari src3 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 3); // aaaaaaaaaaaaaaaa
-
- simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
- simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
- simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
- simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
-
- simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
- simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
- simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
- simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
-
- simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0) // rbga0 rbga1 rbga2 rbga3
- simdscalari dst1 = _simd_permute2f128_si(tmp2, tmp3, 0x20); // (2, 0) // rbga4 rbga5 rbga6 rbga7
- simdscalari dst2 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1) // rbga8 rbga9 rbgaA rbgaB
- simdscalari dst3 = _simd_permute2f128_si(tmp2, tmp3, 0x31); // (3, 1) // rbgaC rbgaD rbgaE rbgaF
-
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 2, dst2); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 3, dst3); // rgbargbargbargba
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose16_16_16
-//////////////////////////////////////////////////////////////////////////
-struct Transpose16_16_16
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 16_16_16 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalari src_rg = _simd_load_si((const simdscalari*)pSrc);
-
- simd4scalari src_r = _simd_extractf128_si(src_rg, 0);
- simd4scalari src_g = _simd_extractf128_si(src_rg, 1);
- simd4scalari src_b = SIMD128::load_si((const simd4scalari*)(pSrc + sizeof(simdscalari)));
- simd4scalari src_a = SIMD128::setzero_si();
-
- simd4scalari rg0 = SIMD128::unpacklo_epi16(src_r, src_g);
- simd4scalari rg1 = SIMD128::unpackhi_epi16(src_r, src_g);
- simd4scalari ba0 = SIMD128::unpacklo_epi16(src_b, src_a);
- simd4scalari ba1 = SIMD128::unpackhi_epi16(src_b, src_a);
-
- simd4scalari dst0 = SIMD128::unpacklo_epi32(rg0, ba0);
- simd4scalari dst1 = SIMD128::unpackhi_epi32(rg0, ba0);
- simd4scalari dst2 = SIMD128::unpacklo_epi32(rg1, ba1);
- simd4scalari dst3 = SIMD128::unpackhi_epi32(rg1, ba1);
-
- SIMD128::store_si(((simd4scalari*)pDst) + 0, dst0);
- SIMD128::store_si(((simd4scalari*)pDst) + 1, dst1);
- SIMD128::store_si(((simd4scalari*)pDst) + 2, dst2);
- SIMD128::store_si(((simd4scalari*)pDst) + 3, dst3);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
- simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
- simdscalari src2 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
- simdscalari src3 = _simd_setzero_si(); // aaaaaaaaaaaaaaaa
-
- simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
- simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
- simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
- simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
-
- simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
- simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
- simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
- simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
-
- simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0) // rbga0 rbga1 rbga2 rbga3
- simdscalari dst1 = _simd_permute2f128_si(tmp2, tmp3, 0x20); // (2, 0) // rbga4 rbga5 rbga6 rbga7
- simdscalari dst2 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1) // rbga8 rbga9 rbgaA rbgaB
- simdscalari dst3 = _simd_permute2f128_si(tmp2, tmp3, 0x31); // (3, 1) // rbgaC rbgaD rbgaE rbgaF
-
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 2, dst2); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 3, dst3); // rgbargbargbargba
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose16_16
-//////////////////////////////////////////////////////////////////////////
-struct Transpose16_16
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 16_16 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD_WIDTH == 8
- simdscalar src = _simd_load_ps((const float*)pSrc);
-
- simd4scalar comp0 = _simd_extractf128_ps(src, 0);
- simd4scalar comp1 = _simd_extractf128_ps(src, 1);
-
- simd4scalari comp0i = SIMD128::castps_si(comp0);
- simd4scalari comp1i = SIMD128::castps_si(comp1);
-
- simd4scalari resLo = SIMD128::unpacklo_epi16(comp0i, comp1i);
- simd4scalari resHi = SIMD128::unpackhi_epi16(comp0i, comp1i);
-
- SIMD128::store_si((simd4scalari*)pDst, resLo);
- SIMD128::store_si((simd4scalari*)pDst + 1, resHi);
-#else
-#error Unsupported vector width
-#endif
- }
-
- INLINE static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst)
- {
-#if KNOB_SIMD16_WIDTH == 16
- // clang-format off
-
- simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
- simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
-
- simdscalari tmp0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
- simdscalari tmp1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
-
- simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0) // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
- simdscalari dst1 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1) // rg8 rg9 rgA rgB rgC rgD rgE rgF
-
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
- _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgrgrgrgrgrgrgrg
-
- // clang-format on
-#else
-#error Unsupported vector width
-#endif
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose24_8
-//////////////////////////////////////////////////////////////////////////
-struct Transpose24_8
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 24_8 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose32_8_24
-//////////////////////////////////////////////////////////////////////////
-struct Transpose32_8_24
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 32_8_24 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose4_4_4_4
-//////////////////////////////////////////////////////////////////////////
-struct Transpose4_4_4_4
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 4_4_4_4 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose5_6_5
-//////////////////////////////////////////////////////////////////////////
-struct Transpose5_6_5
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 5_6_5 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose9_9_9_5
-//////////////////////////////////////////////////////////////////////////
-struct Transpose9_9_9_5
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 9_9_9_5 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose5_5_5_1
-//////////////////////////////////////////////////////////////////////////
-struct Transpose5_5_5_1
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 5_5_5_1 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose1_5_5_5
-//////////////////////////////////////////////////////////////////////////
-struct Transpose1_5_5_5
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 5_5_5_1 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose10_10_10_2
-//////////////////////////////////////////////////////////////////////////
-struct Transpose10_10_10_2
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 10_10_10_2 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose11_11_10
-//////////////////////////////////////////////////////////////////////////
-struct Transpose11_11_10
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion for packed 11_11_10 data.
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose64
-//////////////////////////////////////////////////////////////////////////
-struct Transpose64
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose64_64
-//////////////////////////////////////////////////////////////////////////
-struct Transpose64_64
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose64_64_64
-//////////////////////////////////////////////////////////////////////////
-struct Transpose64_64_64
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Transpose64_64_64_64
-//////////////////////////////////////////////////////////////////////////
-struct Transpose64_64_64_64
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs an SOA to AOS conversion
- /// @param pSrc - source data in SOA form
- /// @param pDst - output data in AOS form
- static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
- static void Transpose_simd16(const uint8_t* pSrc, uint8_t* pDst) = delete;
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
deleted file mode 100644
index 50ea12e0510..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ /dev/null
@@ -1,2385 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file frontend.cpp
- *
- * @brief Implementation for Frontend which handles vertex processing,
- * primitive assembly, clipping, binning, etc.
- *
- ******************************************************************************/
-
-#include "api.h"
-#include "frontend.h"
-#include "backend.h"
-#include "context.h"
-#include "rdtsc_core.h"
-#include "utils.h"
-#include "threads.h"
-#include "pa.h"
-#include "clip.h"
-#include "tilemgr.h"
-#include "tessellator.h"
-#include <limits>
-#include <iostream>
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FE handler for SwrSync.
-/// @param pContext - pointer to SWR context.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pUserData - Pointer to user data passed back to sync callback.
-/// @todo This should go away when we switch this to use compute threading.
-void ProcessSync(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
-{
- BE_WORK work;
- work.type = SYNC;
- work.pfnWork = ProcessSyncBE;
-
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
- pTileMgr->enqueue(0, 0, &work);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FE handler for SwrDestroyContext.
-/// @param pContext - pointer to SWR context.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pUserData - Pointer to user data passed back to sync callback.
-void ProcessShutdown(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
-{
- BE_WORK work;
- work.type = SHUTDOWN;
- work.pfnWork = ProcessShutdownBE;
-
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
- // Enqueue at least 1 work item for each worker thread
- // account for number of numa nodes
- uint32_t numNumaNodes = pContext->threadPool.numaMask + 1;
-
- for (uint32_t i = 0; i < pContext->threadPool.numThreads; ++i)
- {
- for (uint32_t n = 0; n < numNumaNodes; ++n)
- {
- pTileMgr->enqueue(i, n, &work);
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FE handler for SwrClearRenderTarget.
-/// @param pContext - pointer to SWR context.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pUserData - Pointer to user data passed back to clear callback.
-/// @todo This should go away when we switch this to use compute threading.
-void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
-{
- CLEAR_DESC* pDesc = (CLEAR_DESC*)pUserData;
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
-
- // queue a clear to each macro tile
- // compute macro tile bounds for the specified rect
- uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
- uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
- uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
- uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
-
- BE_WORK work;
- work.type = CLEAR;
- work.pfnWork = ProcessClearBE;
- work.desc.clear = *pDesc;
-
- for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
- {
- for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
- {
- pTileMgr->enqueue(x, y, &work);
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FE handler for SwrStoreTiles.
-/// @param pContext - pointer to SWR context.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pUserData - Pointer to user data passed back to callback.
-/// @todo This should go away when we switch this to use compute threading.
-void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
-{
- RDTSC_BEGIN(pContext->pBucketMgr, FEProcessStoreTiles, pDC->drawId);
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
- STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
-
- // queue a store to each macro tile
- // compute macro tile bounds for the specified rect
- uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
- uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
- uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
- uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
-
- // store tiles
- BE_WORK work;
- work.type = STORETILES;
- work.pfnWork = ProcessStoreTilesBE;
- work.desc.storeTiles = *pDesc;
-
- for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
- {
- for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
- {
- pTileMgr->enqueue(x, y, &work);
- }
- }
-
- RDTSC_END(pContext->pBucketMgr, FEProcessStoreTiles, 0);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FE handler for SwrInvalidateTiles.
-/// @param pContext - pointer to SWR context.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pUserData - Pointer to user data passed back to callback.
-/// @todo This should go away when we switch this to use compute threading.
-void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t workerId,
- void* pUserData)
-{
- RDTSC_BEGIN(pContext->pBucketMgr, FEProcessInvalidateTiles, pDC->drawId);
- DISCARD_INVALIDATE_TILES_DESC* pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
- MacroTileMgr* pTileMgr = pDC->pTileMgr;
-
- // compute macro tile bounds for the specified rect
- uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM;
- uint32_t macroTileXMax = (pDesc->rect.xmax / KNOB_MACROTILE_X_DIM) - 1;
- uint32_t macroTileYMin = (pDesc->rect.ymin + KNOB_MACROTILE_Y_DIM - 1) / KNOB_MACROTILE_Y_DIM;
- uint32_t macroTileYMax = (pDesc->rect.ymax / KNOB_MACROTILE_Y_DIM) - 1;
-
- if (pDesc->fullTilesOnly == false)
- {
- // include partial tiles
- macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
- macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
- macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
- macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
- }
-
- SWR_ASSERT(macroTileXMax <= KNOB_NUM_HOT_TILES_X);
- SWR_ASSERT(macroTileYMax <= KNOB_NUM_HOT_TILES_Y);
-
- macroTileXMax = std::min<int32_t>(macroTileXMax, KNOB_NUM_HOT_TILES_X);
- macroTileYMax = std::min<int32_t>(macroTileYMax, KNOB_NUM_HOT_TILES_Y);
-
- // load tiles
- BE_WORK work;
- work.type = DISCARDINVALIDATETILES;
- work.pfnWork = ProcessDiscardInvalidateTilesBE;
- work.desc.discardInvalidateTiles = *pDesc;
-
- for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
- {
- for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
- {
- pTileMgr->enqueue(x, y, &work);
- }
- }
-
- RDTSC_END(pContext->pBucketMgr, FEProcessInvalidateTiles, 0);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the number of primitives given the number of verts.
-/// @param mode - primitive topology for draw operation.
-/// @param numPrims - number of vertices or indices for draw.
-/// @todo Frontend needs to be refactored. This will go in appropriate place then.
-uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numPrims)
-{
- switch (mode)
- {
- case TOP_POINT_LIST:
- return numPrims;
- case TOP_TRIANGLE_LIST:
- return numPrims / 3;
- case TOP_TRIANGLE_STRIP:
- return numPrims < 3 ? 0 : numPrims - 2;
- case TOP_TRIANGLE_FAN:
- return numPrims < 3 ? 0 : numPrims - 2;
- case TOP_TRIANGLE_DISC:
- return numPrims < 2 ? 0 : numPrims - 1;
- case TOP_QUAD_LIST:
- return numPrims / 4;
- case TOP_QUAD_STRIP:
- return numPrims < 4 ? 0 : (numPrims - 2) / 2;
- case TOP_LINE_STRIP:
- return numPrims < 2 ? 0 : numPrims - 1;
- case TOP_LINE_LIST:
- return numPrims / 2;
- case TOP_LINE_LOOP:
- return numPrims;
- case TOP_RECT_LIST:
- return numPrims / 3;
- case TOP_LINE_LIST_ADJ:
- return numPrims / 4;
- case TOP_LISTSTRIP_ADJ:
- return numPrims < 3 ? 0 : numPrims - 3;
- case TOP_TRI_LIST_ADJ:
- return numPrims / 6;
- case TOP_TRI_STRIP_ADJ:
- return numPrims < 4 ? 0 : (numPrims / 2) - 2;
-
- case TOP_PATCHLIST_1:
- case TOP_PATCHLIST_2:
- case TOP_PATCHLIST_3:
- case TOP_PATCHLIST_4:
- case TOP_PATCHLIST_5:
- case TOP_PATCHLIST_6:
- case TOP_PATCHLIST_7:
- case TOP_PATCHLIST_8:
- case TOP_PATCHLIST_9:
- case TOP_PATCHLIST_10:
- case TOP_PATCHLIST_11:
- case TOP_PATCHLIST_12:
- case TOP_PATCHLIST_13:
- case TOP_PATCHLIST_14:
- case TOP_PATCHLIST_15:
- case TOP_PATCHLIST_16:
- case TOP_PATCHLIST_17:
- case TOP_PATCHLIST_18:
- case TOP_PATCHLIST_19:
- case TOP_PATCHLIST_20:
- case TOP_PATCHLIST_21:
- case TOP_PATCHLIST_22:
- case TOP_PATCHLIST_23:
- case TOP_PATCHLIST_24:
- case TOP_PATCHLIST_25:
- case TOP_PATCHLIST_26:
- case TOP_PATCHLIST_27:
- case TOP_PATCHLIST_28:
- case TOP_PATCHLIST_29:
- case TOP_PATCHLIST_30:
- case TOP_PATCHLIST_31:
- case TOP_PATCHLIST_32:
- return numPrims / (mode - TOP_PATCHLIST_BASE);
-
- case TOP_POLYGON:
- case TOP_POINT_LIST_BF:
- case TOP_LINE_STRIP_CONT:
- case TOP_LINE_STRIP_BF:
- case TOP_LINE_STRIP_CONT_BF:
- case TOP_TRIANGLE_FAN_NOSTIPPLE:
- case TOP_TRI_STRIP_REVERSE:
- case TOP_PATCHLIST_BASE:
- case TOP_UNKNOWN:
- SWR_INVALID("Unsupported topology: %d", mode);
- return 0;
- }
-
- return 0;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the number of verts given the number of primitives.
-/// @param mode - primitive topology for draw operation.
-/// @param numPrims - number of primitives for draw.
-uint32_t GetNumVerts(PRIMITIVE_TOPOLOGY mode, uint32_t numPrims)
-{
- switch (mode)
- {
- case TOP_POINT_LIST:
- return numPrims;
- case TOP_TRIANGLE_LIST:
- return numPrims * 3;
- case TOP_TRIANGLE_STRIP:
- return numPrims ? numPrims + 2 : 0;
- case TOP_TRIANGLE_FAN:
- return numPrims ? numPrims + 2 : 0;
- case TOP_TRIANGLE_DISC:
- return numPrims ? numPrims + 1 : 0;
- case TOP_QUAD_LIST:
- return numPrims * 4;
- case TOP_QUAD_STRIP:
- return numPrims ? numPrims * 2 + 2 : 0;
- case TOP_LINE_STRIP:
- return numPrims ? numPrims + 1 : 0;
- case TOP_LINE_LIST:
- return numPrims * 2;
- case TOP_LINE_LOOP:
- return numPrims;
- case TOP_RECT_LIST:
- return numPrims * 3;
- case TOP_LINE_LIST_ADJ:
- return numPrims * 4;
- case TOP_LISTSTRIP_ADJ:
- return numPrims ? numPrims + 3 : 0;
- case TOP_TRI_LIST_ADJ:
- return numPrims * 6;
- case TOP_TRI_STRIP_ADJ:
- return numPrims ? (numPrims + 2) * 2 : 0;
-
- case TOP_PATCHLIST_1:
- case TOP_PATCHLIST_2:
- case TOP_PATCHLIST_3:
- case TOP_PATCHLIST_4:
- case TOP_PATCHLIST_5:
- case TOP_PATCHLIST_6:
- case TOP_PATCHLIST_7:
- case TOP_PATCHLIST_8:
- case TOP_PATCHLIST_9:
- case TOP_PATCHLIST_10:
- case TOP_PATCHLIST_11:
- case TOP_PATCHLIST_12:
- case TOP_PATCHLIST_13:
- case TOP_PATCHLIST_14:
- case TOP_PATCHLIST_15:
- case TOP_PATCHLIST_16:
- case TOP_PATCHLIST_17:
- case TOP_PATCHLIST_18:
- case TOP_PATCHLIST_19:
- case TOP_PATCHLIST_20:
- case TOP_PATCHLIST_21:
- case TOP_PATCHLIST_22:
- case TOP_PATCHLIST_23:
- case TOP_PATCHLIST_24:
- case TOP_PATCHLIST_25:
- case TOP_PATCHLIST_26:
- case TOP_PATCHLIST_27:
- case TOP_PATCHLIST_28:
- case TOP_PATCHLIST_29:
- case TOP_PATCHLIST_30:
- case TOP_PATCHLIST_31:
- case TOP_PATCHLIST_32:
- return numPrims * (mode - TOP_PATCHLIST_BASE);
-
- case TOP_POLYGON:
- case TOP_POINT_LIST_BF:
- case TOP_LINE_STRIP_CONT:
- case TOP_LINE_STRIP_BF:
- case TOP_LINE_STRIP_CONT_BF:
- case TOP_TRIANGLE_FAN_NOSTIPPLE:
- case TOP_TRI_STRIP_REVERSE:
- case TOP_PATCHLIST_BASE:
- case TOP_UNKNOWN:
- SWR_INVALID("Unsupported topology: %d", mode);
- return 0;
- }
-
- return 0;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Return number of verts per primitive.
-/// @param topology - topology
-/// @param includeAdjVerts - include adjacent verts in primitive vertices
-uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts)
-{
- uint32_t numVerts = 0;
- switch (topology)
- {
- case TOP_POINT_LIST:
- case TOP_POINT_LIST_BF:
- numVerts = 1;
- break;
- case TOP_LINE_LIST:
- case TOP_LINE_STRIP:
- case TOP_LINE_LIST_ADJ:
- case TOP_LINE_LOOP:
- case TOP_LINE_STRIP_CONT:
- case TOP_LINE_STRIP_BF:
- case TOP_LISTSTRIP_ADJ:
- numVerts = 2;
- break;
- case TOP_TRIANGLE_LIST:
- case TOP_TRIANGLE_STRIP:
- case TOP_TRIANGLE_FAN:
- case TOP_TRI_LIST_ADJ:
- case TOP_TRI_STRIP_ADJ:
- case TOP_TRI_STRIP_REVERSE:
- case TOP_RECT_LIST:
- numVerts = 3;
- break;
- case TOP_QUAD_LIST:
- case TOP_QUAD_STRIP:
- numVerts = 4;
- break;
- case TOP_PATCHLIST_1:
- case TOP_PATCHLIST_2:
- case TOP_PATCHLIST_3:
- case TOP_PATCHLIST_4:
- case TOP_PATCHLIST_5:
- case TOP_PATCHLIST_6:
- case TOP_PATCHLIST_7:
- case TOP_PATCHLIST_8:
- case TOP_PATCHLIST_9:
- case TOP_PATCHLIST_10:
- case TOP_PATCHLIST_11:
- case TOP_PATCHLIST_12:
- case TOP_PATCHLIST_13:
- case TOP_PATCHLIST_14:
- case TOP_PATCHLIST_15:
- case TOP_PATCHLIST_16:
- case TOP_PATCHLIST_17:
- case TOP_PATCHLIST_18:
- case TOP_PATCHLIST_19:
- case TOP_PATCHLIST_20:
- case TOP_PATCHLIST_21:
- case TOP_PATCHLIST_22:
- case TOP_PATCHLIST_23:
- case TOP_PATCHLIST_24:
- case TOP_PATCHLIST_25:
- case TOP_PATCHLIST_26:
- case TOP_PATCHLIST_27:
- case TOP_PATCHLIST_28:
- case TOP_PATCHLIST_29:
- case TOP_PATCHLIST_30:
- case TOP_PATCHLIST_31:
- case TOP_PATCHLIST_32:
- numVerts = topology - TOP_PATCHLIST_BASE;
- break;
- default:
- SWR_INVALID("Unsupported topology: %d", topology);
- break;
- }
-
- if (includeAdjVerts)
- {
- switch (topology)
- {
- case TOP_LISTSTRIP_ADJ:
- case TOP_LINE_LIST_ADJ:
- numVerts = 4;
- break;
- case TOP_TRI_STRIP_ADJ:
- case TOP_TRI_LIST_ADJ:
- numVerts = 6;
- break;
- default:
- break;
- }
- }
-
- return numVerts;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Generate mask from remaining work.
-/// @param numWorkItems - Number of items being worked on by a SIMD.
-static INLINE simdscalari GenerateMask(uint32_t numItemsRemaining)
-{
- uint32_t numActive =
- (numItemsRemaining >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : numItemsRemaining;
- uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0;
- return _simd_castps_si(_simd_vmask_ps(mask));
-}
-
-static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining)
-{
- uint32_t numActive =
- (numItemsRemaining >= KNOB_SIMD16_WIDTH) ? KNOB_SIMD16_WIDTH : numItemsRemaining;
- uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0;
- return _simd16_castps_si(_simd16_vmask_ps(mask));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief StreamOut - Streams vertex data out to SO buffers.
-/// Generally, we are only streaming out a SIMDs worth of triangles.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param numPrims - Number of prims to streamout (e.g. points, lines, tris)
-static void StreamOut(
- DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t workerId, uint32_t* pPrimData, uint32_t streamIndex)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEStreamout, pDC->drawId);
-
- void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_STREAMOUT_STATE& soState = state.soState;
-
- uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
-
- // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each
- // vertex.
- uint32_t primDataDwordVertexStride = (SWR_VTX_NUM_SLOTS * sizeof(float) * 4) / sizeof(uint32_t);
-
- SWR_STREAMOUT_CONTEXT soContext = {0};
-
- // Setup buffer state pointers.
- for (uint32_t i = 0; i < 4; ++i)
- {
- soContext.pBuffer[i] = &state.soBuffer[i];
- }
-
- uint32_t numPrims = pa.NumPrims();
-
- for (uint32_t primIndex = 0; primIndex < numPrims; ++primIndex)
- {
- unsigned long slot = 0;
- uint64_t soMask = soState.streamMasks[streamIndex];
-
- // Write all entries into primitive data buffer for SOS.
- while (_BitScanForward64(&slot, soMask))
- {
- simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
- uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
- pa.AssembleSingle(paSlot, primIndex, attrib);
-
- // Attribute offset is relative offset from start of vertex.
- // Note that attributes start at slot 1 in the PA buffer. We need to write this
- // to prim data starting at slot 0. Which is why we do (slot - 1).
- // Also note: GL works slightly differently, and needs slot 0
- uint32_t primDataAttribOffset = slot * sizeof(float) * 4 / sizeof(uint32_t);
-
- // Store each vertex's attrib at appropriate locations in pPrimData buffer.
- for (uint32_t v = 0; v < soVertsPerPrim; ++v)
- {
- uint32_t* pPrimDataAttrib =
- pPrimData + primDataAttribOffset + (v * primDataDwordVertexStride);
-
- _mm_store_ps((float*)pPrimDataAttrib, attrib[v]);
- }
-
- soMask &= ~(uint64_t(1) << slot);
- }
-
- // Update pPrimData pointer
- soContext.pPrimData = pPrimData;
-
- // Call SOS
- SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr,
- "Trying to execute uninitialized streamout jit function.");
- state.pfnSoFunc[streamIndex](GetPrivateState(pDC), pWorkerData, soContext);
- }
-
- // Update SO write offset. The driver provides memory for the update.
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (state.soBuffer[i].pWriteOffset)
- {
- bool nullTileAccessed = false;
- void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(
- GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed, pWorkerData);
- *((uint32_t*)pWriteOffset) = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
- }
-
- if (state.soBuffer[i].soWriteEnable)
- {
- pDC->dynState.SoWriteOffset[i] = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
- pDC->dynState.SoWriteOffsetDirty[i] = true;
- }
- }
-
- pDC->dynState.soPrims += soContext.numPrimsWritten;
-
- UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
- UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
-
- RDTSC_END(pDC->pContext->pBucketMgr, FEStreamout, 1);
-}
-
-#if USE_SIMD16_FRONTEND
-//////////////////////////////////////////////////////////////////////////
-/// Is value an even number (a multiple of two)
-///
-template <typename T>
-INLINE static bool IsEven(T value)
-{
- return (value & 1) == 0;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Round up value to an even number (a multiple of two)
-///
-template <typename T>
-INLINE static T RoundUpEven(T value)
-{
- return (value + 1) & ~1;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Round down value to an even number (a multiple of two)
-///
-template <typename T>
-INLINE static T RoundDownEven(T value)
-{
- return value & ~1;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Pack pairs of simdvertexes into simd16vertexes, assume non-overlapping
-///
-/// vertexCount is in terms of the source simdvertexes and must be even
-///
-/// attribCount will limit the vector copies to those attribs specified
-///
-/// note: the stride between vertexes is determinded by SWR_VTX_NUM_SLOTS
-///
-void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex* vertex_simd16,
- const simdvertex* vertex,
- uint32_t vertexCount,
- uint32_t attribCount)
-{
- SWR_ASSERT(vertex);
- SWR_ASSERT(vertex_simd16);
- SWR_ASSERT(attribCount <= SWR_VTX_NUM_SLOTS);
-
- simd16vertex temp;
-
- for (uint32_t i = 0; i < vertexCount; i += 2)
- {
- for (uint32_t j = 0; j < attribCount; j += 1)
- {
- for (uint32_t k = 0; k < 4; k += 1)
- {
- temp.attrib[j][k] =
- _simd16_insert_ps(_simd16_setzero_ps(), vertex[i].attrib[j][k], 0);
-
- if ((i + 1) < vertexCount)
- {
- temp.attrib[j][k] =
- _simd16_insert_ps(temp.attrib[j][k], vertex[i + 1].attrib[j][k], 1);
- }
- }
- }
-
- for (uint32_t j = 0; j < attribCount; j += 1)
- {
- vertex_simd16[i >> 1].attrib[j] = temp.attrib[j];
- }
- }
-}
-
-#endif
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes number of invocations. The current index represents
-/// the start of the SIMD. The max index represents how much work
-/// items are remaining. If there is less then a SIMD's xmin of work
-/// then return the remaining amount of work.
-/// @param curIndex - The start index for the SIMD.
-/// @param maxIndex - The last index for all work items.
-static INLINE uint32_t GetNumInvocations(uint32_t curIndex, uint32_t maxIndex)
-{
- uint32_t remainder = (maxIndex - curIndex);
-#if USE_SIMD16_FRONTEND
- return (remainder >= KNOB_SIMD16_WIDTH) ? KNOB_SIMD16_WIDTH : remainder;
-#else
- return (remainder >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : remainder;
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Converts a streamId buffer to a cut buffer for the given stream id.
-/// The geometry shader will loop over each active streamout buffer, assembling
-/// primitives for the downstream stages. When multistream output is enabled,
-/// the generated stream ID buffer from the GS needs to be converted to a cut
-/// buffer for the primitive assembler.
-/// @param stream - stream id to generate the cut buffer for
-/// @param pStreamIdBase - pointer to the stream ID buffer
-/// @param numEmittedVerts - Number of total verts emitted by the GS
-/// @param pCutBuffer - output buffer to write cuts to
-void ProcessStreamIdBuffer(uint32_t stream,
- uint8_t* pStreamIdBase,
- uint32_t numEmittedVerts,
- uint8_t* pCutBuffer)
-{
- SWR_ASSERT(stream < MAX_SO_STREAMS);
-
- uint32_t numOutputBytes = AlignUp(numEmittedVerts, 8) / 8;
-
- for (uint32_t b = 0; b < numOutputBytes; ++b)
- {
- uint8_t curInputByte = pStreamIdBase[2 * b];
- uint8_t outByte = 0;
- for (uint32_t i = 0; i < 4; ++i)
- {
- if ((curInputByte & 0x3) != stream)
- {
- outByte |= (1 << i);
- }
- curInputByte >>= 2;
- }
-
- curInputByte = pStreamIdBase[2 * b + 1];
- for (uint32_t i = 0; i < 4; ++i)
- {
- if ((curInputByte & 0x3) != stream)
- {
- outByte |= (1 << (i + 4));
- }
- curInputByte >>= 2;
- }
-
- *pCutBuffer++ = outByte;
- }
-}
-
-// Buffers that are allocated if GS is enabled
-struct GsBuffers
-{
- uint8_t* pGsIn;
- uint8_t* pGsOut[KNOB_SIMD_WIDTH];
- uint8_t* pGsTransposed;
- void* pStreamCutBuffer;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Transposes GS output from SOA to AOS to feed the primitive assembler
-/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive
-/// assembler
-/// @param pSrc - Buffer of vertices in SOA form written by the geometry shader
-/// @param numVerts - Number of vertices outputted by the GS
-/// @param numAttribs - Number of attributes per vertex
-template <typename SIMD_T, uint32_t SimdWidth>
-void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t numAttribs)
-{
- uint32_t srcVertexStride = numAttribs * sizeof(float) * 4;
- uint32_t dstVertexStride = numAttribs * sizeof(Float<SIMD_T>) * 4;
-
- OSALIGNSIMD16(uint32_t) gatherOffsets[SimdWidth];
-
- for (uint32_t i = 0; i < SimdWidth; ++i)
- {
- gatherOffsets[i] = srcVertexStride * i;
- }
- auto vGatherOffsets = SIMD_T::load_si((Integer<SIMD_T>*)&gatherOffsets[0]);
-
- uint32_t numSimd = AlignUp(numVerts, SimdWidth) / SimdWidth;
- uint32_t remainingVerts = numVerts;
-
- for (uint32_t s = 0; s < numSimd; ++s)
- {
- uint8_t* pSrcBase = pSrc + s * srcVertexStride * SimdWidth;
- uint8_t* pDstBase = pDst + s * dstVertexStride;
-
- // Compute mask to prevent src overflow
- uint32_t mask = std::min(remainingVerts, SimdWidth);
- mask = GenMask(mask);
- auto vMask = SIMD_T::vmask_ps(mask);
- auto viMask = SIMD_T::castps_si(vMask);
-
- for (uint32_t a = 0; a < numAttribs; ++a)
- {
- auto attribGatherX = SIMD_T::mask_i32gather_ps(
- SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
- auto attribGatherY = SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
- (const float*)(pSrcBase + sizeof(float)),
- vGatherOffsets,
- vMask);
- auto attribGatherZ =
- SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
- (const float*)(pSrcBase + sizeof(float) * 2),
- vGatherOffsets,
- vMask);
- auto attribGatherW =
- SIMD_T::mask_i32gather_ps(SIMD_T::setzero_ps(),
- (const float*)(pSrcBase + sizeof(float) * 3),
- vGatherOffsets,
- vMask);
-
- SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX);
- SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>)), viMask, attribGatherY);
- SIMD_T::maskstore_ps(
- (float*)(pDstBase + sizeof(Float<SIMD_T>) * 2), viMask, attribGatherZ);
- SIMD_T::maskstore_ps(
- (float*)(pDstBase + sizeof(Float<SIMD_T>) * 3), viMask, attribGatherW);
-
- pSrcBase += sizeof(float) * 4;
- pDstBase += sizeof(Float<SIMD_T>) * 4;
- }
- remainingVerts -= SimdWidth;
- }
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Implements GS stage.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pa - The primitive assembly object.
-/// @param pGsOut - output stream for GS
-template <typename HasStreamOutT, typename HasRastT>
-static void GeometryShaderStage(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- PA_STATE& pa,
- GsBuffers* pGsBuffers,
- uint32_t* pSoPrimData,
-#if USE_SIMD16_FRONTEND
- uint32_t numPrims_simd8,
-#endif
- simdscalari const& primID)
-{
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEGeometryShader, pDC->drawId);
-
- void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_GS_STATE* pState = &state.gsState;
- SWR_GS_CONTEXT gsContext;
-
- static uint8_t sNullBuffer[128] = {0};
-
- for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
- {
- gsContext.pStreams[i] = pGsBuffers->pGsOut[i];
- }
- gsContext.pVerts = (simdvector*)pGsBuffers->pGsIn;
- gsContext.PrimitiveID = primID;
-
- uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
- simdvector attrib[MAX_NUM_VERTS_PER_PRIM];
-
- // assemble all attributes for the input primitive
- gsContext.inputVertStride = pState->inputVertStride;
- for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
- {
- uint32_t attribOffset = slot + pState->vertexAttribOffset;
- pa.Assemble(attribOffset, attrib);
-
- for (uint32_t i = 0; i < numVertsPerPrim; ++i)
- {
- gsContext.pVerts[attribOffset + pState->inputVertStride * i] = attrib[i];
- }
- }
-
- // record valid prims from the frontend to avoid over binning the newly generated
- // prims from the GS
-#if USE_SIMD16_FRONTEND
- uint32_t numInputPrims = numPrims_simd8;
-#else
- uint32_t numInputPrims = pa.NumPrims();
-#endif
-
- for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
- {
- gsContext.InstanceID = instance;
- gsContext.mask = GenerateMask(numInputPrims);
-
- // execute the geometry shader
- state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
- AR_EVENT(GSStats((HANDLE)&gsContext.stats));
-
- for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
- {
- gsContext.pStreams[i] += pState->allocationSize;
- }
- }
-
- // set up new binner and state for the GS output topology
-#if USE_SIMD16_FRONTEND
- PFN_PROCESS_PRIMS_SIMD16 pfnClipFunc = nullptr;
- if (HasRastT::value)
- {
- switch (pState->outputTopology)
- {
- case TOP_RECT_LIST:
- pfnClipFunc = ClipRectangles_simd16;
- break;
- case TOP_TRIANGLE_STRIP:
- pfnClipFunc = ClipTriangles_simd16;
- break;
- case TOP_LINE_STRIP:
- pfnClipFunc = ClipLines_simd16;
- break;
- case TOP_POINT_LIST:
- pfnClipFunc = ClipPoints_simd16;
- break;
- default:
- SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
- }
- }
-
-#else
- PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
- if (HasRastT::value)
- {
- switch (pState->outputTopology)
- {
- case TOP_RECT_LIST:
- pfnClipFunc = ClipRectangles;
- break;
- case TOP_TRIANGLE_STRIP:
- pfnClipFunc = ClipTriangles;
- break;
- case TOP_LINE_STRIP:
- pfnClipFunc = ClipLines;
- break;
- case TOP_POINT_LIST:
- pfnClipFunc = ClipPoints;
- break;
- default:
- SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
- }
- }
-
-#endif
- // foreach input prim:
- // - setup a new PA based on the emitted verts for that prim
- // - loop over the new verts, calling PA to assemble each prim
- uint32_t* pPrimitiveId = (uint32_t*)&primID;
-
- uint32_t totalPrimsGenerated = 0;
- for (uint32_t inputPrim = 0; inputPrim < numInputPrims; ++inputPrim)
- {
- uint8_t* pInstanceBase = (uint8_t*)pGsBuffers->pGsOut[inputPrim];
-
- // Vertex count is either emitted by shader or static
- uint32_t vertexCount = 0;
- if (pState->staticVertexCount)
- {
- vertexCount = pState->staticVertexCount;
- }
- else
- {
- // If emitted in shader, it should be the stored in the first dword of the output buffer
- vertexCount = *(uint32_t*)pInstanceBase;
- }
-
- for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
- {
- uint32_t numEmittedVerts = vertexCount;
- if (numEmittedVerts == 0)
- {
- continue;
- }
-
- uint8_t* pBase = pInstanceBase + instance * pState->allocationSize;
- uint8_t* pCutBase =
- pState->controlDataSize == 0 ? &sNullBuffer[0] : pBase + pState->controlDataOffset;
- uint8_t* pVertexBaseAOS = pBase + pState->outputVertexOffset;
-
-#if USE_SIMD16_FRONTEND
- TransposeSOAtoAOS<SIMD512, KNOB_SIMD16_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed,
- pVertexBaseAOS,
- vertexCount,
- pState->outputVertexSize);
-#else
- TransposeSOAtoAOS<SIMD256, KNOB_SIMD_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed,
- pVertexBaseAOS,
- vertexCount,
- pState->outputVertexSize);
-#endif
-
- uint32_t numAttribs = state.feNumAttributes;
-
- for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
- {
- bool processCutVerts = false;
- uint8_t* pCutBuffer = pCutBase;
-
- // assign default stream ID, only relevant when GS is outputting a single stream
- uint32_t streamID = 0;
- if (pState->isSingleStream)
- {
- processCutVerts = true;
- streamID = pState->singleStreamID;
- if (streamID != stream)
- continue;
- }
- else
- {
- // early exit if this stream is not enabled for streamout
- if (HasStreamOutT::value && !state.soState.streamEnable[stream])
- {
- continue;
- }
-
- // multi-stream output, need to translate StreamID buffer to a cut buffer
- ProcessStreamIdBuffer(
- stream, pCutBase, numEmittedVerts, (uint8_t*)pGsBuffers->pStreamCutBuffer);
- pCutBuffer = (uint8_t*)pGsBuffers->pStreamCutBuffer;
- processCutVerts = false;
- }
-
-#if USE_SIMD16_FRONTEND
- PA_STATE_CUT gsPa(pDC,
- (uint8_t*)pGsBuffers->pGsTransposed,
- numEmittedVerts,
- pState->outputVertexSize,
- reinterpret_cast<simd16mask*>(pCutBuffer),
- numEmittedVerts,
- numAttribs,
- pState->outputTopology,
- processCutVerts,
- pa.numVertsPerPrim);
-
-#else
- PA_STATE_CUT gsPa(pDC,
- (uint8_t*)pGsBuffers->pGsTransposed,
- numEmittedVerts,
- pState->outputVertexSize,
- pCutBuffer,
- numEmittedVerts,
- numAttribs,
- pState->outputTopology,
- processCutVerts,
- pa.numVertsPerPrim);
-
-#endif
- while (gsPa.GetNextStreamOutput())
- {
- do
- {
-#if USE_SIMD16_FRONTEND
- simd16vector attrib_simd16[3];
-
- bool assemble = gsPa.Assemble(VERTEX_POSITION_SLOT, attrib_simd16);
-
-#else
- bool assemble = gsPa.Assemble(VERTEX_POSITION_SLOT, attrib);
-
-#endif
- if (assemble)
- {
- totalPrimsGenerated += gsPa.NumPrims();
-
- if (HasStreamOutT::value)
- {
-#if ENABLE_AVX512_SIMD16
- gsPa.useAlternateOffset = false;
-#endif
- StreamOut(pDC, gsPa, workerId, pSoPrimData, stream);
- }
-
- if (HasRastT::value && state.soState.streamToRasterizer == stream)
- {
-#if USE_SIMD16_FRONTEND
- simd16scalari vPrimId = _simd16_set1_epi32(pPrimitiveId[inputPrim]);
-
- // Gather data from the SVG if provided.
- simd16scalari vViewportIdx = SIMD16::setzero_si();
- simd16scalari vRtIdx = SIMD16::setzero_si();
- SIMD16::Vec4 svgAttrib[4];
-
- if (state.backendState.readViewportArrayIndex ||
- state.backendState.readRenderTargetArrayIndex)
- {
- gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
- }
-
- if (state.backendState.readViewportArrayIndex)
- {
- vViewportIdx =
- SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
- gsPa.viewportArrayActive = true;
- }
- if (state.backendState.readRenderTargetArrayIndex)
- {
- vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
- gsPa.rtArrayActive = true;
- }
-
- {
- // OOB VPAI indices => forced to zero.
- vViewportIdx =
- SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
- simd16scalari vNumViewports =
- SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask =
- SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
-
- gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC,
- gsPa,
- workerId,
- attrib_simd16,
- GenMask(gsPa.NumPrims()),
- vPrimId,
- vViewportIdx,
- vRtIdx);
- }
-#else
- simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
-
- // Gather data from the SVG if provided.
- simdscalari vViewportIdx = SIMD::setzero_si();
- simdscalari vRtIdx = SIMD::setzero_si();
- SIMD::Vec4 svgAttrib[4];
-
- if (state.backendState.readViewportArrayIndex ||
- state.backendState.readRenderTargetArrayIndex)
- {
- gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
- }
-
- if (state.backendState.readViewportArrayIndex)
- {
- vViewportIdx =
- SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
-
- // OOB VPAI indices => forced to zero.
- vViewportIdx =
- SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
- simdscalari vNumViewports =
- SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask =
- SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
- gsPa.viewportArrayActive = true;
- }
- if (state.backendState.readRenderTargetArrayIndex)
- {
- vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
- gsPa.rtArrayActive = true;
- }
-
- pfnClipFunc(pDC,
- gsPa,
- workerId,
- attrib,
- GenMask(gsPa.NumPrims()),
- vPrimId,
- vViewportIdx,
- vRtIdx);
-#endif
- }
- }
- } while (gsPa.NextPrim());
- }
- }
- }
- }
-
- // update GS pipeline stats
- UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
- UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
- AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim * numInputPrims));
- RDTSC_END(pDC->pContext->pBucketMgr, FEGeometryShader, 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Allocate GS buffers
-/// @param pDC - pointer to draw context.
-/// @param state - API state
-/// @param ppGsOut - pointer to GS output buffer allocation
-/// @param ppCutBuffer - pointer to GS output cut buffer allocation
-template <typename SIMD_T, uint32_t SIMD_WIDTH>
-static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC,
- const API_STATE& state,
- uint32_t vertsPerPrim,
- GsBuffers* pGsBuffers)
-{
- auto pArena = pDC->pArena;
- SWR_ASSERT(pArena != nullptr);
- SWR_ASSERT(state.gsState.gsEnable);
-
- const SWR_GS_STATE& gsState = state.gsState;
-
- // Allocate storage for vertex inputs
- uint32_t vertexInBufferSize = gsState.inputVertStride * sizeof(simdvector) * vertsPerPrim;
- pGsBuffers->pGsIn = (uint8_t*)pArena->AllocAligned(vertexInBufferSize, 32);
-
- // Allocate arena space to hold GS output verts
- const uint32_t vertexBufferSize = gsState.instanceCount * gsState.allocationSize;
-
- for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
- {
- pGsBuffers->pGsOut[i] = (uint8_t*)pArena->AllocAligned(vertexBufferSize, 32);
- }
-
- // Allocate storage for transposed GS output
- uint32_t numSimdBatches = AlignUp(gsState.maxNumVerts, SIMD_WIDTH) / SIMD_WIDTH;
- uint32_t transposedBufferSize =
- numSimdBatches * gsState.outputVertexSize * sizeof(Vec4<SIMD_T>);
- pGsBuffers->pGsTransposed = (uint8_t*)pArena->AllocAligned(transposedBufferSize, 32);
-
- // Allocate storage to hold temporary stream->cut buffer, if necessary
- if (state.gsState.isSingleStream)
- {
- pGsBuffers->pStreamCutBuffer = nullptr;
- }
- else
- {
- pGsBuffers->pStreamCutBuffer =
- (uint8_t*)pArena->AllocAligned(AlignUp(gsState.maxNumVerts * 2, 32), 32);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Contains all data generated by the HS and passed to the
-/// tessellator and DS.
-struct TessellationThreadLocalData
-{
- SWR_HS_CONTEXT hsContext;
- void* pTxCtx;
- size_t tsCtxSize;
-
- uint8_t* pHSOutput;
- size_t hsOutputAllocSize;
-
- simdscalar* pDSOutput;
- size_t dsOutputAllocSize;
-};
-
-THREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Allocate tessellation data for this worker thread.
-INLINE
-static void AllocateTessellationData(SWR_CONTEXT* pContext)
-{
- /// @TODO - Don't use thread local storage. Use Worker local storage instead.
- if (gt_pTessellationThreadData == nullptr)
- {
- gt_pTessellationThreadData =
- (TessellationThreadLocalData*)AlignedMalloc(sizeof(TessellationThreadLocalData), 64);
- memset((void*)gt_pTessellationThreadData, 0, sizeof(*gt_pTessellationThreadData));
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Implements Tessellation Stages.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id. Even thread has a unique id.
-/// @param pa - The primitive assembly object.
-/// @param pGsOut - output stream for GS
-template <typename HasGeometryShaderT, typename HasStreamOutT, typename HasRastT>
-static void TessellationStages(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- PA_STATE& pa,
- GsBuffers* pGsBuffers,
- uint32_t* pSoPrimData,
-#if USE_SIMD16_FRONTEND
- uint32_t numPrims_simd8,
-#endif
- simdscalari const& primID)
-{
- const API_STATE& state = GetApiState(pDC);
- const SWR_TS_STATE& tsState = state.tsState;
- void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- SWR_ASSERT(gt_pTessellationThreadData);
-
- HANDLE tsCtx = TSInitCtx(tsState.domain,
- tsState.partitioning,
- tsState.tsOutputTopology,
- gt_pTessellationThreadData->pTxCtx,
- gt_pTessellationThreadData->tsCtxSize);
- if (tsCtx == nullptr)
- {
- gt_pTessellationThreadData->pTxCtx =
- AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64);
- tsCtx = TSInitCtx(tsState.domain,
- tsState.partitioning,
- tsState.tsOutputTopology,
- gt_pTessellationThreadData->pTxCtx,
- gt_pTessellationThreadData->tsCtxSize);
- }
- SWR_ASSERT(tsCtx);
-
-#if USE_SIMD16_FRONTEND
- PFN_PROCESS_PRIMS_SIMD16 pfnClipFunc = nullptr;
- if (HasRastT::value)
- {
- switch (tsState.postDSTopology)
- {
- case TOP_TRIANGLE_LIST:
- pfnClipFunc = ClipTriangles_simd16;
- break;
- case TOP_LINE_LIST:
- pfnClipFunc = ClipLines_simd16;
- break;
- case TOP_POINT_LIST:
- pfnClipFunc = ClipPoints_simd16;
- break;
- default:
- SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
- }
- }
-
-#else
- PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
- if (HasRastT::value)
- {
- switch (tsState.postDSTopology)
- {
- case TOP_TRIANGLE_LIST:
- pfnClipFunc = ClipTriangles;
- break;
- case TOP_LINE_LIST:
- pfnClipFunc = ClipLines;
- break;
- case TOP_POINT_LIST:
- pfnClipFunc = ClipPoints;
- break;
- default:
- SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
- }
- }
-
-#endif
- SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
- hsContext.PrimitiveID = primID;
- hsContext.outputSize = tsState.hsAllocationSize;
-
- uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
- // Max storage for one attribute for an entire simdprimitive
- simdvector simdattrib[MAX_NUM_VERTS_PER_PRIM];
-
- // Assemble position separately
- // TESS_TODO: this could be avoided - fix it
- pa.Assemble(VERTEX_POSITION_SLOT, simdattrib);
- for (uint32_t i = 0; i < numVertsPerPrim; ++i) {
- hsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = simdattrib[i];
- }
-
- // assemble all attributes for the input primitives
- for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
- {
- uint32_t attribSlot = tsState.srcVertexAttribOffset + slot;
- pa.Assemble(attribSlot, simdattrib);
-
- for (uint32_t i = 0; i < numVertsPerPrim; ++i)
- {
- hsContext.vert[i].attrib[tsState.vertexAttribOffset + slot] = simdattrib[i];
- }
- }
-
- // Allocate HS output storage
- uint32_t requiredAllocSize = KNOB_SIMD_WIDTH * tsState.hsAllocationSize;
-
- if (requiredAllocSize > gt_pTessellationThreadData->hsOutputAllocSize)
- {
- AlignedFree(gt_pTessellationThreadData->pHSOutput);
- gt_pTessellationThreadData->pHSOutput = (uint8_t*)AlignedMalloc(requiredAllocSize, 64);
- gt_pTessellationThreadData->hsOutputAllocSize = requiredAllocSize;
- }
-
- hsContext.pCPout = (ScalarPatch*)gt_pTessellationThreadData->pHSOutput;
-
-#if defined(_DEBUG)
- //memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
-#endif
- memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
-
-#if USE_SIMD16_FRONTEND
- uint32_t numPrims = numPrims_simd8;
-#else
- uint32_t numPrims = pa.NumPrims();
-#endif
- hsContext.mask = GenerateMask(numPrims);
-
- // Run the HS
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEHullShader, pDC->drawId);
- state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext);
- RDTSC_END(pDC->pContext->pBucketMgr, FEHullShader, 0);
-
- UPDATE_STAT_FE(HsInvocations, numPrims);
- AR_EVENT(HSStats((HANDLE)&hsContext.stats));
-
- const uint32_t* pPrimId = (const uint32_t*)&primID;
-
- for (uint32_t p = 0; p < numPrims; ++p)
- {
- ScalarPatch* pCPout = (ScalarPatch*)(gt_pTessellationThreadData->pHSOutput + tsState.hsAllocationSize * p);
-
- SWR_TESSELLATION_FACTORS tessFactors;
- tessFactors = hsContext.pCPout[p].tessFactors;
-
- // Run Tessellator
- SWR_TS_TESSELLATED_DATA tsData = {0};
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId);
- TSTessellate(tsCtx, tessFactors, tsData);
- AR_EVENT(TessPrimCount(1));
- RDTSC_END(pDC->pContext->pBucketMgr, FETessellation, 0);
-
- if (tsData.NumPrimitives == 0)
- {
- continue;
- }
- SWR_ASSERT(tsData.NumDomainPoints);
-
- // Allocate DS Output memory
- uint32_t requiredDSVectorInvocations =
- AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH;
-#if USE_SIMD16_FRONTEND
- size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) *
- tsState.dsAllocationSize; // simd8 -> simd16, padding
-#else
- size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.dsAllocationSize;
- size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors;
-#endif
- if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize)
- {
- AlignedFree(gt_pTessellationThreadData->pDSOutput);
- gt_pTessellationThreadData->pDSOutput =
- (simdscalar*)AlignedMalloc(requiredAllocSize, 64);
- gt_pTessellationThreadData->dsOutputAllocSize = requiredAllocSize;
- }
- SWR_ASSERT(gt_pTessellationThreadData->pDSOutput);
- SWR_ASSERT(gt_pTessellationThreadData->dsOutputAllocSize >= requiredAllocSize);
-
-#if defined(_DEBUG)
- memset(gt_pTessellationThreadData->pDSOutput, 0x90, requiredAllocSize);
-#endif
-
- // Run Domain Shader
- SWR_DS_CONTEXT dsContext;
- dsContext.PrimitiveID = pPrimId[p];
- dsContext.pCpIn = pCPout;
- dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU;
- dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV;
- dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput;
- dsContext.outVertexAttribOffset = tsState.dsOutVtxAttribOffset;
-#if USE_SIMD16_FRONTEND
- dsContext.vectorStride = RoundUpEven(requiredDSVectorInvocations); // simd8 -> simd16
-#else
- dsContext.vectorStride = requiredDSVectorInvocations;
-#endif
-
- uint32_t dsInvocations = 0;
-
- for (dsContext.vectorOffset = 0; dsContext.vectorOffset < requiredDSVectorInvocations;
- ++dsContext.vectorOffset)
- {
- dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEDomainShader, pDC->drawId);
- state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
- RDTSC_END(pDC->pContext->pBucketMgr, FEDomainShader, 0);
-
- AR_EVENT(DSStats((HANDLE)&dsContext.stats));
-
- dsInvocations += KNOB_SIMD_WIDTH;
- }
- UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints);
-
-#if USE_SIMD16_FRONTEND
- SWR_ASSERT(IsEven(dsContext.vectorStride)); // simd8 -> simd16
-
-#endif
- PA_TESS tessPa(
- pDC,
-#if USE_SIMD16_FRONTEND
- reinterpret_cast<const simd16scalar*>(dsContext.pOutputData), // simd8 -> simd16
- dsContext.vectorStride / 2, // simd8 -> simd16
-#else
- dsContext.pOutputData,
- dsContext.vectorStride,
-#endif
- SWR_VTX_NUM_SLOTS,
- tsState.numDsOutputAttribs + tsState.dsOutVtxAttribOffset,
- tsData.ppIndices,
- tsData.NumPrimitives,
- tsState.postDSTopology,
- NumVertsPerPrim(tsState.postDSTopology, false));
-
- while (tessPa.HasWork())
- {
-#if USE_SIMD16_FRONTEND
- const uint32_t numPrims = tessPa.NumPrims();
- const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
- const uint32_t numPrims_hi =
- std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
-
- const simd16scalari primID = _simd16_set1_epi32(dsContext.PrimitiveID);
- const simdscalari primID_lo = _simd16_extract_si(primID, 0);
- const simdscalari primID_hi = _simd16_extract_si(primID, 1);
-
-#endif
- if (HasGeometryShaderT::value)
- {
-#if USE_SIMD16_FRONTEND
- tessPa.useAlternateOffset = false;
- GeometryShaderStage<HasStreamOutT, HasRastT>(
- pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_lo, primID_lo);
-
- if (numPrims_hi)
- {
- tessPa.useAlternateOffset = true;
- GeometryShaderStage<HasStreamOutT, HasRastT>(
- pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_hi, primID_hi);
- }
-#else
- GeometryShaderStage<HasStreamOutT, HasRastT>(
- pDC,
- workerId,
- tessPa,
- pGsBuffers,
- pSoPrimData,
- _simd_set1_epi32(dsContext.PrimitiveID));
-#endif
- }
- else
- {
- if (HasStreamOutT::value)
- {
-#if ENABLE_AVX512_SIMD16
- tessPa.useAlternateOffset = false;
-#endif
- StreamOut(pDC, tessPa, workerId, pSoPrimData, 0);
- }
-
- if (HasRastT::value)
- {
-#if USE_SIMD16_FRONTEND
- simd16vector prim_simd16[3]; // Only deal with triangles, lines, or points
-#else
- simdvector prim[3]; // Only deal with triangles, lines, or points
-#endif
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEPAAssemble, pDC->drawId);
- bool assemble =
-#if USE_SIMD16_FRONTEND
- tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
-#else
- tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
-#endif
- RDTSC_END(pDC->pContext->pBucketMgr, FEPAAssemble, 1);
- SWR_ASSERT(assemble);
-
- SWR_ASSERT(pfnClipFunc);
-#if USE_SIMD16_FRONTEND
- // Gather data from the SVG if provided.
- simd16scalari vViewportIdx = SIMD16::setzero_si();
- simd16scalari vRtIdx = SIMD16::setzero_si();
- SIMD16::Vec4 svgAttrib[4] = {SIMD16::setzero_ps()};
-
- if (state.backendState.readViewportArrayIndex ||
- state.backendState.readRenderTargetArrayIndex)
- {
- tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
- }
-
- if (state.backendState.readViewportArrayIndex)
- {
- vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
- tessPa.viewportArrayActive = true;
- }
- if (state.backendState.readRenderTargetArrayIndex)
- {
- vRtIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
- tessPa.rtArrayActive = true;
- }
-
-
- {
- // OOB VPAI indices => forced to zero.
- vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
- simd16scalari vNumViewports =
- SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
-
- tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC,
- tessPa,
- workerId,
- prim_simd16,
- GenMask(numPrims),
- primID,
- vViewportIdx,
- vRtIdx);
- }
-#else
- // Gather data from the SGV if provided.
- simdscalari vViewportIdx = SIMD::setzero_si();
- simdscalari vRtIdx = SIMD::setzero_si();
- SIMD::Vec4 svgAttrib[4];
-
- if (state.backendState.readViewportArrayIndex ||
- state.backendState.readRenderTargetArrayIndex)
- {
- tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
- }
-
- if (state.backendState.readViewportArrayIndex)
- {
- vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
-
- // OOB VPAI indices => forced to zero.
- vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
- simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
- tessPa.viewportArrayActive = true;
- }
- if (state.backendState.readRenderTargetArrayIndex)
- {
- vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
- tessPa.rtArrayActive = true;
- }
- pfnClipFunc(pDC,
- tessPa,
- workerId,
- prim,
- GenMask(tessPa.NumPrims()),
- _simd_set1_epi32(dsContext.PrimitiveID),
- vViewportIdx,
- vRtIdx);
-#endif
- }
- }
-
- tessPa.NextPrim();
-
- } // while (tessPa.HasWork())
- } // for (uint32_t p = 0; p < numPrims; ++p)
-
-#if USE_SIMD16_FRONTEND
- if (gt_pTessellationThreadData->pDSOutput != nullptr)
- {
- AlignedFree(gt_pTessellationThreadData->pDSOutput);
- gt_pTessellationThreadData->pDSOutput = nullptr;
- }
- gt_pTessellationThreadData->dsOutputAllocSize = 0;
-
-#endif
- TSDestroyCtx(tsCtx);
-}
-
-THREAD PA_STATE::SIMDVERTEX* gpVertexStore = nullptr;
-THREAD uint32_t gVertexStoreSize = 0;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief FE handler for SwrDraw.
-/// @tparam IsIndexedT - Is indexed drawing enabled
-/// @tparam HasTessellationT - Is tessellation enabled
-/// @tparam HasGeometryShaderT::value - Is the geometry shader stage enabled
-/// @tparam HasStreamOutT - Is stream-out enabled
-/// @tparam HasRastT - Is rasterization enabled
-/// @param pContext - pointer to SWR context.
-/// @param pDC - pointer to draw context.
-/// @param workerId - thread's worker id.
-/// @param pUserData - Pointer to DRAW_WORK
-template <typename IsIndexedT,
- typename IsCutIndexEnabledT,
- typename HasTessellationT,
- typename HasGeometryShaderT,
- typename HasStreamOutT,
- typename HasRastT>
-void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
-{
-#if KNOB_ENABLE_TOSS_POINTS
- if (KNOB_TOSS_QUEUE_FE)
- {
- return;
- }
-#endif
-
- RDTSC_BEGIN(pContext->pBucketMgr, FEProcessDraw, pDC->drawId);
-
- void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- DRAW_WORK& work = *(DRAW_WORK*)pUserData;
- const API_STATE& state = GetApiState(pDC);
-
- uint32_t indexSize = 0;
- uint32_t endVertex = work.numVerts;
-
- gfxptr_t xpLastRequestedIndex = 0;
- if (IsIndexedT::value)
- {
- switch (work.type)
- {
- case R32_UINT:
- indexSize = sizeof(uint32_t);
- break;
- case R16_UINT:
- indexSize = sizeof(uint16_t);
- break;
- case R8_UINT:
- indexSize = sizeof(uint8_t);
- break;
- default:
- SWR_INVALID("Invalid work.type: %d", work.type);
- }
- xpLastRequestedIndex = work.xpIB + endVertex * indexSize;
- }
- else
- {
- // No cuts, prune partial primitives.
- endVertex = GetNumVerts(state.topology, GetNumPrims(state.topology, work.numVerts));
- }
-
-#if defined(KNOB_ENABLE_RDTSC) || defined(KNOB_ENABLE_AR)
- uint32_t numPrims = GetNumPrims(state.topology, work.numVerts);
-#endif
-
- GsBuffers gsBuffers;
- if (HasGeometryShaderT::value)
- {
-#if USE_SIMD16_FRONTEND
- AllocateGsBuffers<SIMD512, KNOB_SIMD16_WIDTH>(
- pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
-#else
- AllocateGsBuffers<SIMD256, KNOB_SIMD_WIDTH>(
- pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
-#endif
- }
-
- if (HasTessellationT::value)
- {
- SWR_ASSERT(state.tsState.tsEnable == true);
- SWR_ASSERT(state.pfnHsFunc != nullptr);
- SWR_ASSERT(state.pfnDsFunc != nullptr);
-
- AllocateTessellationData(pContext);
- }
- else
- {
- SWR_ASSERT(state.tsState.tsEnable == false);
- SWR_ASSERT(state.pfnHsFunc == nullptr);
- SWR_ASSERT(state.pfnDsFunc == nullptr);
- }
-
- // allocate space for streamout input prim data
- uint32_t* pSoPrimData = nullptr;
- if (HasStreamOutT::value)
- {
- pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16);
- }
-
- const uint32_t vertexCount = NumVertsPerPrim(state.topology, true);
-#if USE_SIMD16_FRONTEND
- uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simd16vector);
-#else
- uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simdvector);
-#endif
-
- SWR_ASSERT(vertexCount <= MAX_NUM_VERTS_PER_PRIM);
-
- // Compute storage requirements for vertex store
- // TODO: allocation needs to be rethought for better cut support
- uint32_t numVerts = vertexCount + 2; // Need extra space for PA state machine
- uint32_t vertexStoreSize = numVerts * simdVertexSizeBytes;
-
- // grow the vertex store for the PA as necessary
- if (gVertexStoreSize < vertexStoreSize)
- {
- if (gpVertexStore != nullptr)
- {
- AlignedFree(gpVertexStore);
- gpVertexStore = nullptr;
- }
-
- SWR_ASSERT(gpVertexStore == nullptr);
-
- gpVertexStore = reinterpret_cast<PA_STATE::SIMDVERTEX*>(AlignedMalloc(vertexStoreSize, 64));
- gVertexStoreSize = vertexStoreSize;
-
- SWR_ASSERT(gpVertexStore != nullptr);
- }
-
- // choose primitive assembler
-
- PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC,
- state.topology,
- work.numVerts,
- gpVertexStore,
- numVerts,
- state.frontendState.vsVertexSize,
- GetNumVerts(state.topology, 1));
- PA_STATE& pa = paFactory.GetPA();
-
-#if USE_SIMD16_FRONTEND
-#if USE_SIMD16_SHADERS
- simd16vertex vin;
-#else
- simdvertex vin_lo;
- simdvertex vin_hi;
-#endif
- SWR_VS_CONTEXT vsContext_lo;
- SWR_VS_CONTEXT vsContext_hi;
-
-#if USE_SIMD16_SHADERS
- vsContext_lo.pVin = reinterpret_cast<simdvertex*>(&vin);
- vsContext_hi.pVin = reinterpret_cast<simdvertex*>(&vin);
-#else
- vsContext_lo.pVin = &vin_lo;
- vsContext_hi.pVin = &vin_hi;
-#endif
- vsContext_lo.AlternateOffset = 0;
- vsContext_hi.AlternateOffset = 1;
-
- SWR_FETCH_CONTEXT fetchInfo_lo = {0};
-
- fetchInfo_lo.pStreams = &state.vertexBuffers[0];
- fetchInfo_lo.StartInstance = work.startInstance;
- fetchInfo_lo.StartVertex = 0;
-
- if (IsIndexedT::value)
- {
- fetchInfo_lo.BaseVertex = work.baseVertex;
-
- // if the entire index buffer isn't being consumed, set the last index
- // so that fetches < a SIMD wide will be masked off
- fetchInfo_lo.xpLastIndex = state.indexBuffer.xpIndices + state.indexBuffer.size;
- if (xpLastRequestedIndex < fetchInfo_lo.xpLastIndex)
- {
- fetchInfo_lo.xpLastIndex = xpLastRequestedIndex;
- }
- }
- else
- {
- fetchInfo_lo.StartVertex = work.startVertex;
- }
-
- SWR_FETCH_CONTEXT fetchInfo_hi = fetchInfo_lo;
-
- const simd16scalari vScale =
- _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-
- for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++)
- {
- uint32_t i = 0;
-
- simd16scalari vIndex;
-
- if (IsIndexedT::value)
- {
- fetchInfo_lo.xpIndices = work.xpIB;
- fetchInfo_hi.xpIndices =
- fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH
- }
- else
- {
- vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
-
- fetchInfo_lo.xpIndices = pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), &vIndex);
-
- int32_t* sysAddr = reinterpret_cast<int32_t*>(&vIndex);
- sysAddr += KNOB_SIMD_WIDTH; // 1/2 of KNOB_SIMD16_WIDTH
-
- fetchInfo_hi.xpIndices = pDC->pContext->pfnMakeGfxPtr(GetPrivateState(pDC), sysAddr);
- }
-
- fetchInfo_lo.CurInstance = instanceNum;
- fetchInfo_hi.CurInstance = instanceNum;
-
- vsContext_lo.InstanceID = instanceNum;
- vsContext_hi.InstanceID = instanceNum;
-
- while (pa.HasWork())
- {
- // GetNextVsOutput currently has the side effect of updating some PA state machine
- // state. So we need to keep this outside of (i < endVertex) check.
-
- simdmask* pvCutIndices_lo = nullptr;
- simdmask* pvCutIndices_hi = nullptr;
-
- if (IsIndexedT::value)
- {
- // simd16mask <=> simdmask[2]
-
- pvCutIndices_lo = &reinterpret_cast<simdmask*>(&pa.GetNextVsIndices())[0];
- pvCutIndices_hi = &reinterpret_cast<simdmask*>(&pa.GetNextVsIndices())[1];
- }
-
- simd16vertex& vout = pa.GetNextVsOutput();
-
- vsContext_lo.pVout = reinterpret_cast<simdvertex*>(&vout);
- vsContext_hi.pVout = reinterpret_cast<simdvertex*>(&vout);
-
- if (i < endVertex)
- {
- if (!IsIndexedT::value)
- {
- fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices;
- uint32_t offset;
- offset = std::min(endVertex - i, (uint32_t)KNOB_SIMD16_WIDTH);
- offset *= 4; // convert from index to address
-#if USE_SIMD16_SHADERS
- fetchInfo_lo.xpLastIndex += offset;
-#else
- fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t)KNOB_SIMD_WIDTH);
- uint32_t offset2 =
- std::min(offset, (uint32_t)KNOB_SIMD16_WIDTH) - KNOB_SIMD_WIDTH;
- assert(offset >= 0);
- fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices;
- fetchInfo_hi.xpLastIndex += offset2;
-#endif
- }
- // 1. Execute FS/VS for a single SIMD.
- RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId);
-#if USE_SIMD16_SHADERS
- state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin);
-#else
- state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo);
-
- if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
- {
- state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
- }
-#endif
- RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0);
-
- // forward fetch generated vertex IDs to the vertex shader
-#if USE_SIMD16_SHADERS
-#if USE_SIMD16_VS
- vsContext_lo.VertexID16 =
- _simd16_insert_si(vsContext_lo.VertexID16, fetchInfo_lo.VertexID, 0);
- vsContext_lo.VertexID16 =
- _simd16_insert_si(vsContext_lo.VertexID16, fetchInfo_lo.VertexID2, 1);
-#else
- vsContext_lo.VertexID = fetchInfo_lo.VertexID;
- vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
-#endif
-#else
- vsContext_lo.VertexID = fetchInfo_lo.VertexID;
- vsContext_hi.VertexID = fetchInfo_hi.VertexID;
-#endif
-
- // Setup active mask for vertex shader.
-#if USE_SIMD16_VS
- vsContext_lo.mask16 = GenerateMask16(endVertex - i);
-#else
- vsContext_lo.mask = GenerateMask(endVertex - i);
- vsContext_hi.mask = GenerateMask(endVertex - (i + KNOB_SIMD_WIDTH));
-#endif
-
- // forward cut mask to the PA
- if (IsIndexedT::value)
- {
-#if USE_SIMD16_SHADERS
- *pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
- *pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask2));
-#else
- *pvCutIndices_lo = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_lo.CutMask));
- *pvCutIndices_hi = _simd_movemask_ps(_simd_castsi_ps(fetchInfo_hi.CutMask));
-#endif
- }
-
- UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
-
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_FETCH)
-#endif
- {
- RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId);
-#if USE_SIMD16_VS
- state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
- AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
-#else
- state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
- AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
-
- if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
- {
- state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
- AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats));
- }
-#endif
- RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0);
-
- UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
- }
- }
-
- // 2. Assemble primitives given the last two SIMD.
- do
- {
- simd16vector prim_simd16[MAX_NUM_VERTS_PER_PRIM];
-
- RDTSC_START(pContext->pBucketMgr, FEPAAssemble);
- bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
- RDTSC_STOP(pContext->pBucketMgr, FEPAAssemble, 1, 0);
-
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_FETCH)
-#endif
- {
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_VS)
-#endif
- {
- if (assemble)
- {
- UPDATE_STAT_FE(IaPrimitives, pa.NumPrims());
-
- const uint32_t numPrims = pa.NumPrims();
- const uint32_t numPrims_lo =
- std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
- const uint32_t numPrims_hi =
- std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
-
- const simd16scalari primID = pa.GetPrimID(work.startPrimID);
- const simdscalari primID_lo = _simd16_extract_si(primID, 0);
- const simdscalari primID_hi = _simd16_extract_si(primID, 1);
-
- if (HasTessellationT::value)
- {
- pa.useAlternateOffset = false;
- TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
- pDC,
- workerId,
- pa,
- &gsBuffers,
- pSoPrimData,
- numPrims_lo,
- primID_lo);
-
- if (numPrims_hi)
- {
- pa.useAlternateOffset = true;
- TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
- pDC,
- workerId,
- pa,
- &gsBuffers,
- pSoPrimData,
- numPrims_hi,
- primID_hi);
- }
- }
- else if (HasGeometryShaderT::value)
- {
- pa.useAlternateOffset = false;
- GeometryShaderStage<HasStreamOutT, HasRastT>(pDC,
- workerId,
- pa,
- &gsBuffers,
- pSoPrimData,
- numPrims_lo,
- primID_lo);
-
- if (numPrims_hi)
- {
- pa.useAlternateOffset = true;
- GeometryShaderStage<HasStreamOutT, HasRastT>(pDC,
- workerId,
- pa,
- &gsBuffers,
- pSoPrimData,
- numPrims_hi,
- primID_hi);
- }
- }
- else
- {
- // If streamout is enabled then stream vertices out to memory.
- if (HasStreamOutT::value)
- {
- pa.useAlternateOffset = false;
- StreamOut(pDC, pa, workerId, pSoPrimData, 0);
- }
-
- if (HasRastT::value)
- {
- SWR_ASSERT(pDC->pState->pfnProcessPrims_simd16);
- // Gather data from the SVG if provided.
- simd16scalari vpai = SIMD16::setzero_si();
- simd16scalari rtai = SIMD16::setzero_si();
- SIMD16::Vec4 svgAttrib[4];
-
- if (state.backendState.readViewportArrayIndex ||
- state.backendState.readRenderTargetArrayIndex)
- {
- pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
- }
-
- if (state.backendState.readViewportArrayIndex)
- {
- vpai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
- pa.viewportArrayActive = true;
- }
- if (state.backendState.readRenderTargetArrayIndex)
- {
- rtai =
- SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
- pa.rtArrayActive = true;
- }
-
- {
- // OOB VPAI indices => forced to zero.
- vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
- simd16scalari vNumViewports =
- SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask =
- SIMD16::cmplt_epi32(vpai, vNumViewports);
- vpai = SIMD16::and_si(vClearMask, vpai);
-
- pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC,
- pa,
- workerId,
- prim_simd16,
- GenMask(numPrims),
- primID,
- vpai,
- rtai);
- }
- }
- }
- }
- }
- }
- } while (pa.NextPrim());
-
- if (IsIndexedT::value)
- {
- fetchInfo_lo.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
- fetchInfo_hi.xpIndices = fetchInfo_hi.xpIndices + KNOB_SIMD16_WIDTH * indexSize;
- }
- else
- {
- vIndex = _simd16_add_epi32(vIndex, _simd16_set1_epi32(KNOB_SIMD16_WIDTH));
- }
-
- i += KNOB_SIMD16_WIDTH;
- }
-
- pa.Reset();
- }
-
-#else
- SWR_VS_CONTEXT vsContext;
- SWR_FETCH_CONTEXT fetchInfo = {0};
-
- fetchInfo.pStreams = &state.vertexBuffers[0];
- fetchInfo.StartInstance = work.startInstance;
- fetchInfo.StartVertex = 0;
-
- if (IsIndexedT::value)
- {
- fetchInfo.BaseVertex = work.baseVertex;
-
- // if the entire index buffer isn't being consumed, set the last index
- // so that fetches < a SIMD wide will be masked off
- fetchInfo.pLastIndex =
- (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
- if (xpLastRequestedIndex < fetchInfo.pLastIndex)
- {
- fetchInfo.pLastIndex = xpLastRequestedIndex;
- }
- }
- else
- {
- fetchInfo.StartVertex = work.startVertex;
- }
-
- const simdscalari vScale = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
-
- /// @todo: temporarily move instance loop in the FE to ensure SO ordering
- for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++)
- {
- simdscalari vIndex;
- uint32_t i = 0;
-
- if (IsIndexedT::value)
- {
- fetchInfo.pIndices = work.pIB;
- }
- else
- {
- vIndex = _simd_add_epi32(_simd_set1_epi32(work.startVertexID), vScale);
- fetchInfo.pIndices = (const int32_t*)&vIndex;
- }
-
- fetchInfo.CurInstance = instanceNum;
- vsContext.InstanceID = instanceNum;
-
- while (pa.HasWork())
- {
- // GetNextVsOutput currently has the side effect of updating some PA state machine
- // state. So we need to keep this outside of (i < endVertex) check.
- simdmask* pvCutIndices = nullptr;
- if (IsIndexedT::value)
- {
- pvCutIndices = &pa.GetNextVsIndices();
- }
-
- simdvertex& vout = pa.GetNextVsOutput();
- vsContext.pVin = &vout;
- vsContext.pVout = &vout;
-
- if (i < endVertex)
- {
- // 1. Execute FS/VS for a single SIMD.
- RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId);
- state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
- RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0);
-
- // forward fetch generated vertex IDs to the vertex shader
- vsContext.VertexID = fetchInfo.VertexID;
-
- // Setup active mask for vertex shader.
- vsContext.mask = GenerateMask(endVertex - i);
-
- // forward cut mask to the PA
- if (IsIndexedT::value)
- {
- *pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask));
- }
-
- UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex));
-
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_FETCH)
-#endif
- {
- RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId);
- state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext);
- RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0);
-
- UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
- AR_EVENT(VSStats((HANDLE)&vsContext.stats));
- }
- }
-
- // 2. Assemble primitives given the last two SIMD.
- do
- {
- simdvector prim[MAX_NUM_VERTS_PER_PRIM];
- // PaAssemble returns false if there is not enough verts to assemble.
- RDTSC_BEGIN(pContext->pBucketMgr, FEPAAssemble, pDC->drawId);
- bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
- RDTSC_END(pContext->pBucketMgr, FEPAAssemble, 1);
-
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_FETCH)
-#endif
- {
-#if KNOB_ENABLE_TOSS_POINTS
- if (!KNOB_TOSS_VS)
-#endif
- {
- if (assemble)
- {
- UPDATE_STAT_FE(IaPrimitives, pa.NumPrims());
-
- if (HasTessellationT::value)
- {
- TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
- pDC,
- workerId,
- pa,
- &gsBuffers,
- pSoPrimData,
- pa.GetPrimID(work.startPrimID));
- }
- else if (HasGeometryShaderT::value)
- {
- GeometryShaderStage<HasStreamOutT, HasRastT>(
- pDC,
- workerId,
- pa,
- &gsBuffers,
- pSoPrimData,
- pa.GetPrimID(work.startPrimID));
- }
- else
- {
- // If streamout is enabled then stream vertices out to memory.
- if (HasStreamOutT::value)
- {
- StreamOut(pDC, pa, workerId, pSoPrimData, 0);
- }
-
- if (HasRastT::value)
- {
- SWR_ASSERT(pDC->pState->pfnProcessPrims);
-
- // Gather data from the SVG if provided.
- simdscalari vViewportIdx = SIMD::setzero_si();
- simdscalari vRtIdx = SIMD::setzero_si();
- SIMD::Vec4 svgAttrib[4];
-
- if (state.backendState.readViewportArrayIndex ||
- state.backendState.readRenderTargetArrayIndex)
- {
- pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
- }
-
- if (state.backendState.readViewportArrayIndex)
- {
- vViewportIdx =
- SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
-
- // OOB VPAI indices => forced to zero.
- vViewportIdx =
- SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
- simdscalari vNumViewports =
- SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask =
- SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
- pa.viewportArrayActive = true;
- }
- if (state.backendState.readRenderTargetArrayIndex)
- {
- vRtIdx =
- SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
- pa.rtArrayActive = true;
- }
-
- pDC->pState->pfnProcessPrims(pDC,
- pa,
- workerId,
- prim,
- GenMask(pa.NumPrims()),
- pa.GetPrimID(work.startPrimID),
- vViewportIdx,
- vRtIdx);
- }
- }
- }
- }
- }
- } while (pa.NextPrim());
-
- if (IsIndexedT::value)
- {
- fetchInfo.pIndices =
- (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
- }
- else
- {
- vIndex = _simd_add_epi32(vIndex, _simd_set1_epi32(KNOB_SIMD_WIDTH));
- }
-
- i += KNOB_SIMD_WIDTH;
- }
- pa.Reset();
- }
-
-#endif
-
- RDTSC_END(pContext->pBucketMgr, FEProcessDraw, numPrims * work.numInstances);
-}
-
-struct FEDrawChooser
-{
- typedef PFN_FE_WORK_FUNC FuncType;
-
- template <typename... ArgsB>
- static FuncType GetFunc()
- {
- return ProcessDraw<ArgsB...>;
- }
-};
-
-// Selector for correct templated Draw front-end function
-PFN_FE_WORK_FUNC GetProcessDrawFunc(bool IsIndexed,
- bool IsCutIndexEnabled,
- bool HasTessellation,
- bool HasGeometryShader,
- bool HasStreamOut,
- bool HasRasterization)
-{
- return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed,
- IsCutIndexEnabled,
- HasTessellation,
- HasGeometryShader,
- HasStreamOut,
- HasRasterization);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h
deleted file mode 100644
index a6d9fb5ba52..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ /dev/null
@@ -1,448 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file frontend.h
- *
- * @brief Definitions for Frontend which handles vertex processing,
- * primitive assembly, clipping, binning, etc.
- *
- ******************************************************************************/
-#pragma once
-#include "context.h"
-#include "common/simdintrin.h"
-#include <type_traits>
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Helper macro to generate a bitmask
-static INLINE uint32_t
- GenMask(uint32_t numBits)
-{
- SWR_ASSERT(
- numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__);
- return ((1U << numBits) - 1);
-}
-
-// Calculates the A and B coefficients for the 3 edges of the triangle
-//
-// maths for edge equations:
-// standard form of a line in 2d
-// Ax + By + C = 0
-// A = y0 - y1
-// B = x1 - x0
-// C = x0y1 - x1y0
-INLINE
-void triangleSetupAB(const __m128 vX, const __m128 vY, __m128& vA, __m128& vB)
-{
- // vYsub = y1 y2 y0 dc
- __m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
- // vY = y0 y1 y2 dc
- vA = _mm_sub_ps(vY, vYsub);
-
- // Result:
- // A[0] = y0 - y1
- // A[1] = y1 - y2
- // A[2] = y2 - y0
-
- // vXsub = x1 x2 x0 dc
- __m128 vXsub = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(3, 0, 2, 1));
- // vX = x0 x1 x2 dc
- vB = _mm_sub_ps(vXsub, vX);
-
- // Result:
- // B[0] = x1 - x0
- // B[1] = x2 - x1
- // B[2] = x0 - x2
-}
-
-INLINE
-void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i& vA, __m128i& vB)
-{
- // generate edge equations
- // A = y0 - y1
- // B = x1 - x0
- // C = x0y1 - x1y0
- __m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
- vA = _mm_sub_epi32(vY, vYsub);
-
- __m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
- vB = _mm_sub_epi32(vXsub, vX);
-}
-
-INLINE
-void triangleSetupABIntVertical(const simdscalari vX[3],
- const simdscalari vY[3],
- simdscalari (&vA)[3],
- simdscalari (&vB)[3])
-{
- // A = y0 - y1
- // B = x1 - x0
- vA[0] = _simd_sub_epi32(vY[0], vY[1]);
- vA[1] = _simd_sub_epi32(vY[1], vY[2]);
- vA[2] = _simd_sub_epi32(vY[2], vY[0]);
-
- vB[0] = _simd_sub_epi32(vX[1], vX[0]);
- vB[1] = _simd_sub_epi32(vX[2], vX[1]);
- vB[2] = _simd_sub_epi32(vX[0], vX[2]);
-}
-
-#if ENABLE_AVX512_SIMD16
-INLINE
-void triangleSetupABIntVertical(const simd16scalari vX[3],
- const simd16scalari vY[3],
- simd16scalari (&vA)[3],
- simd16scalari (&vB)[3])
-{
- // A = y0 - y1
- // B = x1 - x0
- vA[0] = _simd16_sub_epi32(vY[0], vY[1]);
- vA[1] = _simd16_sub_epi32(vY[1], vY[2]);
- vA[2] = _simd16_sub_epi32(vY[2], vY[0]);
-
- vB[0] = _simd16_sub_epi32(vX[1], vX[0]);
- vB[1] = _simd16_sub_epi32(vX[2], vX[1]);
- vB[2] = _simd16_sub_epi32(vX[0], vX[2]);
-}
-
-#endif
-// Calculate the determinant of the triangle
-// 2 vectors between the 3 points: P, Q
-// Px = x0-x2, Py = y0-y2
-// Qx = x1-x2, Qy = y1-y2
-// |Px Qx|
-// det = | | = PxQy - PyQx
-// |Py Qy|
-// simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
-// try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
-// : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
-// : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
-// : B[2]*A[1] - A[2]*B[1]
-INLINE
-float calcDeterminantInt(const __m128i vA, const __m128i vB)
-{
- // vAShuf = [A1, A0, A2, A0]
- __m128i vAShuf = _mm_shuffle_epi32(vA, _MM_SHUFFLE(0, 2, 0, 1));
- // vBShuf = [B2, B0, B1, B0]
- __m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
- // vMul = [A1*B2, B1*A2]
- __m128i vMul = _mm_mul_epi32(vAShuf, vBShuf);
-
- // shuffle upper to lower
- // vMul2 = [B1*A2, B1*A2]
- __m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
- // vMul = [A1*B2 - B1*A2]
- vMul = _mm_sub_epi64(vMul, vMul2);
-
- int64_t result;
- _mm_store_sd((double*)&result, _mm_castsi128_pd(vMul));
-
- double dResult = (double)result;
- dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
-
- return (float)dResult;
-}
-
-INLINE
-void calcDeterminantIntVertical(const simdscalari vA[3],
- const simdscalari vB[3],
- simdscalari* pvDet)
-{
- // refer to calcDeterminantInt comment for calculation explanation
-
- // A1*B2
- simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5
- simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7
-
- simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
- simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
-
- simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo); // 0 1 4 5
- simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi); // 2 3 6 7
-
- // B1*A2
- simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
- simdscalari vA2Hi = _simd_unpackhi_epi32(vA[2], vA[2]);
-
- simdscalari vB1Lo = _simd_unpacklo_epi32(vB[1], vB[1]);
- simdscalari vB1Hi = _simd_unpackhi_epi32(vB[1], vB[1]);
-
- simdscalari vA2B1Lo = _simd_mul_epi32(vA2Lo, vB1Lo);
- simdscalari vA2B1Hi = _simd_mul_epi32(vA2Hi, vB1Hi);
-
- // A1*B2 - A2*B1
- simdscalari detLo = _simd_sub_epi64(vA1B2Lo, vA2B1Lo);
- simdscalari detHi = _simd_sub_epi64(vA1B2Hi, vA2B1Hi);
-
- // shuffle 0 1 4 5 2 3 6 7 -> 0 1 2 3
- simdscalari vResultLo = _simd_permute2f128_si(detLo, detHi, 0x20);
-
- // shuffle 0 1 4 5 2 3 6 7 -> 4 5 6 7
- simdscalari vResultHi = _simd_permute2f128_si(detLo, detHi, 0x31);
-
- pvDet[0] = vResultLo;
- pvDet[1] = vResultHi;
-}
-
-#if ENABLE_AVX512_SIMD16
-INLINE
-void calcDeterminantIntVertical(const simd16scalari vA[3],
- const simd16scalari vB[3],
- simd16scalari* pvDet)
-{
- // refer to calcDeterminantInt comment for calculation explanation
-
- // A1*B2
- simd16scalari vA1_lo =
- _simd16_unpacklo_epi32(vA[1], vA[1]); // X 0 X 1 X 4 X 5 X 8 X 9 X C X D (32b)
- simd16scalari vA1_hi = _simd16_unpackhi_epi32(vA[1], vA[1]); // X 2 X 3 X 6 X 7 X A X B X E X F
-
- simd16scalari vB2_lo = _simd16_unpacklo_epi32(vB[2], vB[2]);
- simd16scalari vB2_hi = _simd16_unpackhi_epi32(vB[2], vB[2]);
-
- simd16scalari vA1B2_lo = _simd16_mul_epi32(vA1_lo, vB2_lo); // 0 1 4 5 8 9 C D (64b)
- simd16scalari vA1B2_hi = _simd16_mul_epi32(vA1_hi, vB2_hi); // 2 3 6 7 A B E F
-
- // B1*A2
- simd16scalari vA2_lo = _simd16_unpacklo_epi32(vA[2], vA[2]);
- simd16scalari vA2_hi = _simd16_unpackhi_epi32(vA[2], vA[2]);
-
- simd16scalari vB1_lo = _simd16_unpacklo_epi32(vB[1], vB[1]);
- simd16scalari vB1_hi = _simd16_unpackhi_epi32(vB[1], vB[1]);
-
- simd16scalari vA2B1_lo = _simd16_mul_epi32(vA2_lo, vB1_lo);
- simd16scalari vA2B1_hi = _simd16_mul_epi32(vA2_hi, vB1_hi);
-
- // A1*B2 - A2*B1
- simd16scalari difflo = _simd16_sub_epi64(vA1B2_lo, vA2B1_lo); // 0 1 4 5 8 9 C D (64b)
- simd16scalari diffhi = _simd16_sub_epi64(vA1B2_hi, vA2B1_hi); // 2 3 6 7 A B E F
-
- // (1, 0, 1, 0) = 01 00 01 00 = 0x44, (3, 2, 3, 2) = 11 10 11 10 = 0xEE
- simd16scalari templo = _simd16_permute2f128_si(difflo, diffhi, 0x44); // 0 1 4 5 2 3 6 7 (64b)
- simd16scalari temphi = _simd16_permute2f128_si(difflo, diffhi, 0xEE); // 8 9 C D A B E F
-
- // (3, 1, 2, 0) = 11 01 10 00 = 0xD8
- pvDet[0] = _simd16_permute2f128_si(templo, templo, 0xD8); // 0 1 2 3 4 5 6 7 (64b)
- pvDet[1] = _simd16_permute2f128_si(temphi, temphi, 0xD8); // 8 9 A B C D E F
-}
-
-#endif
-INLINE
-void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128& vB, __m128& vC)
-{
- // C = -Ax - By
- vC = _mm_mul_ps(vA, vX);
- __m128 vCy = _mm_mul_ps(vB, vY);
- vC = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
- vC = _mm_sub_ps(vC, vCy);
-}
-
-template <uint32_t NumVerts>
-INLINE void viewportTransform(simdvector* v, const SWR_VIEWPORT_MATRICES& vpMatrices)
-{
- simdscalar m00 = _simd_load1_ps(&vpMatrices.m00[0]);
- simdscalar m30 = _simd_load1_ps(&vpMatrices.m30[0]);
- simdscalar m11 = _simd_load1_ps(&vpMatrices.m11[0]);
- simdscalar m31 = _simd_load1_ps(&vpMatrices.m31[0]);
- simdscalar m22 = _simd_load1_ps(&vpMatrices.m22[0]);
- simdscalar m32 = _simd_load1_ps(&vpMatrices.m32[0]);
-
- for (uint32_t i = 0; i < NumVerts; ++i)
- {
- v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
- v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
- v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
- }
-}
-
-#if USE_SIMD16_FRONTEND
-template <uint32_t NumVerts>
-INLINE void viewportTransform(simd16vector* v, const SWR_VIEWPORT_MATRICES& vpMatrices)
-{
- const simd16scalar m00 = _simd16_broadcast_ss(&vpMatrices.m00[0]);
- const simd16scalar m30 = _simd16_broadcast_ss(&vpMatrices.m30[0]);
- const simd16scalar m11 = _simd16_broadcast_ss(&vpMatrices.m11[0]);
- const simd16scalar m31 = _simd16_broadcast_ss(&vpMatrices.m31[0]);
- const simd16scalar m22 = _simd16_broadcast_ss(&vpMatrices.m22[0]);
- const simd16scalar m32 = _simd16_broadcast_ss(&vpMatrices.m32[0]);
-
- for (uint32_t i = 0; i < NumVerts; ++i)
- {
- v[i].x = _simd16_fmadd_ps(v[i].x, m00, m30);
- v[i].y = _simd16_fmadd_ps(v[i].y, m11, m31);
- v[i].z = _simd16_fmadd_ps(v[i].z, m22, m32);
- }
-}
-
-#endif
-template <uint32_t NumVerts>
-INLINE void viewportTransform(simdvector* v,
- const SWR_VIEWPORT_MATRICES& vpMatrices,
- simdscalari const& vViewportIdx)
-{
- // perform a gather of each matrix element based on the viewport array indexes
- simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
- simdscalar m30 = _simd_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 4);
- simdscalar m11 = _simd_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 4);
- simdscalar m31 = _simd_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 4);
- simdscalar m22 = _simd_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 4);
- simdscalar m32 = _simd_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 4);
-
- for (uint32_t i = 0; i < NumVerts; ++i)
- {
- v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
- v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
- v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
- }
-}
-
-#if USE_SIMD16_FRONTEND
-template <uint32_t NumVerts>
-INLINE void viewportTransform(simd16vector* v,
- const SWR_VIEWPORT_MATRICES& vpMatrices,
- simd16scalari const& vViewportIdx)
-{
- // perform a gather of each matrix element based on the viewport array indexes
- const simd16scalar m00 = _simd16_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
- const simd16scalar m30 = _simd16_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 4);
- const simd16scalar m11 = _simd16_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 4);
- const simd16scalar m31 = _simd16_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 4);
- const simd16scalar m22 = _simd16_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 4);
- const simd16scalar m32 = _simd16_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 4);
-
- for (uint32_t i = 0; i < NumVerts; ++i)
- {
- v[i].x = _simd16_fmadd_ps(v[i].x, m00, m30);
- v[i].y = _simd16_fmadd_ps(v[i].y, m11, m31);
- v[i].z = _simd16_fmadd_ps(v[i].z, m22, m32);
- }
-}
-
-#endif
-INLINE
-void calcBoundingBoxInt(const __m128i& vX, const __m128i& vY, SWR_RECT& bbox)
-{
- // Need horizontal fp min here
- __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
- __m128i vX2 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 1, 2));
-
- __m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
- __m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
-
- __m128i vMinX = _mm_min_epi32(vX, vX1);
- vMinX = _mm_min_epi32(vMinX, vX2);
-
- __m128i vMaxX = _mm_max_epi32(vX, vX1);
- vMaxX = _mm_max_epi32(vMaxX, vX2);
-
- __m128i vMinY = _mm_min_epi32(vY, vY1);
- vMinY = _mm_min_epi32(vMinY, vY2);
-
- __m128i vMaxY = _mm_max_epi32(vY, vY1);
- vMaxY = _mm_max_epi32(vMaxY, vY2);
-
- bbox.xmin = _mm_extract_epi32(vMinX, 0);
- bbox.xmax = _mm_extract_epi32(vMaxX, 0);
- bbox.ymin = _mm_extract_epi32(vMinY, 0);
- bbox.ymax = _mm_extract_epi32(vMaxY, 0);
-}
-
-INLINE
-bool CanUseSimplePoints(DRAW_CONTEXT* pDC)
-{
- const API_STATE& state = GetApiState(pDC);
-
- return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
- state.rastState.pointSize == 1.0f && !state.rastState.pointParam &&
- !state.rastState.pointSpriteEnable && !state.backendState.clipDistanceMask);
-}
-
-INLINE
-bool vHasNaN(const __m128& vec)
-{
- const __m128 result = _mm_cmpunord_ps(vec, vec);
- const int32_t mask = _mm_movemask_ps(result);
- return (mask != 0);
-}
-
-uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
-uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
-
-// ProcessDraw front-end function. All combinations of parameter values are available
-PFN_FE_WORK_FUNC GetProcessDrawFunc(bool IsIndexed,
- bool IsCutIndexEnabled,
- bool HasTessellation,
- bool HasGeometryShader,
- bool HasStreamOut,
- bool HasRasterization);
-
-void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
-void ProcessStoreTiles(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t workerId,
- void* pUserData);
-void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t workerId,
- void* pUserData);
-void ProcessSync(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
-void ProcessShutdown(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
-
-PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
-#if USE_SIMD16_FRONTEND
-PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
-#endif
-
-struct PA_STATE_BASE; // forward decl
-void BinPoints(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[3],
- uint32_t primMask,
- simdscalari const& primID,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-void BinLines(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simdvector prims[3],
- uint32_t primMask,
- simdscalari const& primID,
- simdscalari const& viewportIdx,
- simdscalari const& rtIdx);
-#if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[3],
- uint32_t primMask,
- simd16scalari const& primID,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- simd16vector prims[3],
- uint32_t primMask,
- simd16scalari const& primID,
- simd16scalari const& viewportIdx,
- simd16scalari const& rtIdx);
-#endif
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h
deleted file mode 100644
index 798e5684025..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file knobs.h
- *
- * @brief Static (Compile-Time) Knobs for Core.
- *
- ******************************************************************************/
-#pragma once
-
-#include <stdint.h>
-#include <gen_knobs.h>
-
-#define KNOB_ARCH_AVX 0
-#define KNOB_ARCH_AVX2 1
-#define KNOB_ARCH_AVX512 2
-
-///////////////////////////////////////////////////////////////////////////////
-// AVX512 Support
-///////////////////////////////////////////////////////////////////////////////
-
-#define ENABLE_AVX512_SIMD16 1
-#define USE_SIMD16_FRONTEND 1
-#define USE_SIMD16_SHADERS 1 // requires USE_SIMD16_FRONTEND
-#define USE_SIMD16_VS 1 // requires USE_SIMD16_SHADERS
-
-///////////////////////////////////////////////////////////////////////////////
-// Architecture validation
-///////////////////////////////////////////////////////////////////////////////
-#if !defined(KNOB_ARCH)
-#define KNOB_ARCH KNOB_ARCH_AVX
-#endif
-
-#if (KNOB_ARCH == KNOB_ARCH_AVX)
-#define KNOB_ARCH_ISA AVX
-#define KNOB_ARCH_STR "AVX"
-#elif (KNOB_ARCH == KNOB_ARCH_AVX2)
-#define KNOB_ARCH_ISA AVX2
-#define KNOB_ARCH_STR "AVX2"
-#elif (KNOB_ARCH == KNOB_ARCH_AVX512)
-#define KNOB_ARCH_ISA AVX512F
-#define KNOB_ARCH_STR "AVX512"
-#else
-#error "Unknown architecture"
-#endif
-
-#define KNOB_SIMD_WIDTH 8
-#define KNOB_SIMD_BYTES 32
-
-#define KNOB_SIMD16_WIDTH 16
-#define KNOB_SIMD16_BYTES 64
-
-#define MAX_KNOB_ARCH_STR_LEN sizeof("AVX512_PLUS_PADDING")
-
-///////////////////////////////////////////////////////////////////////////////
-// Configuration knobs
-///////////////////////////////////////////////////////////////////////////////
-// Maximum supported number of active vertex buffer streams
-#define KNOB_NUM_STREAMS 32
-
-// Maximum supported active viewports and scissors
-#define KNOB_NUM_VIEWPORTS_SCISSORS 16
-
-// Guardband range used by the clipper
-#define KNOB_GUARDBAND_WIDTH 32768.0f
-#define KNOB_GUARDBAND_HEIGHT 32768.0f
-
-// Scratch space requirements per worker. Currently only used for TGSM sizing for some stages
-#define KNOB_WORKER_SCRATCH_SPACE_SIZE (32 * 1024)
-
-///////////////////////////////
-// Macro tile configuration
-///////////////////////////////
-
-// raster tile dimensions
-#define KNOB_TILE_X_DIM 8
-#define KNOB_TILE_X_DIM_SHIFT 3
-#define KNOB_TILE_Y_DIM 8
-#define KNOB_TILE_Y_DIM_SHIFT 3
-
-// fixed macrotile pixel dimension for now, eventually will be
-// dynamically set based on tile format and pixel size
-#define KNOB_MACROTILE_X_DIM 32
-#define KNOB_MACROTILE_Y_DIM 32
-#define KNOB_MACROTILE_X_DIM_FIXED_SHIFT 13
-#define KNOB_MACROTILE_Y_DIM_FIXED_SHIFT 13
-#define KNOB_MACROTILE_X_DIM_FIXED (KNOB_MACROTILE_X_DIM << 8)
-#define KNOB_MACROTILE_Y_DIM_FIXED (KNOB_MACROTILE_Y_DIM << 8)
-#define KNOB_MACROTILE_X_DIM_IN_TILES (KNOB_MACROTILE_X_DIM >> KNOB_TILE_X_DIM_SHIFT)
-#define KNOB_MACROTILE_Y_DIM_IN_TILES (KNOB_MACROTILE_Y_DIM >> KNOB_TILE_Y_DIM_SHIFT)
-
-// total # of hot tiles available. This should be enough to
-// fully render a 16kx16k 128bpp render target
-#define KNOB_NUM_HOT_TILES_X 512
-#define KNOB_NUM_HOT_TILES_Y 512
-#define KNOB_COLOR_HOT_TILE_FORMAT R32G32B32A32_FLOAT
-#define KNOB_DEPTH_HOT_TILE_FORMAT R32_FLOAT
-#define KNOB_STENCIL_HOT_TILE_FORMAT R8_UINT
-
-// Max scissor rectangle
-#define KNOB_MAX_SCISSOR_X KNOB_NUM_HOT_TILES_X* KNOB_MACROTILE_X_DIM
-#define KNOB_MAX_SCISSOR_Y KNOB_NUM_HOT_TILES_Y* KNOB_MACROTILE_Y_DIM
-
-#if KNOB_SIMD_WIDTH == 8 && KNOB_TILE_X_DIM < 4
-#error "incompatible width/tile dimensions"
-#endif
-
-#if ENABLE_AVX512_SIMD16
-#if KNOB_SIMD16_WIDTH == 16 && KNOB_TILE_X_DIM < 8
-#error "incompatible width/tile dimensions"
-#endif
-#endif
-
-#if KNOB_SIMD_WIDTH == 8
-#define SIMD_TILE_X_DIM 4
-#define SIMD_TILE_Y_DIM 2
-#else
-#error "Invalid simd width"
-#endif
-
-#if ENABLE_AVX512_SIMD16
-#if KNOB_SIMD16_WIDTH == 16
-#define SIMD16_TILE_X_DIM 8
-#define SIMD16_TILE_Y_DIM 2
-#else
-#error "Invalid simd width"
-#endif
-#endif
-
-///////////////////////////////////////////////////////////////////////////////
-// Optimization knobs
-///////////////////////////////////////////////////////////////////////////////
-#define KNOB_USE_FAST_SRGB TRUE
-
-// enables cut-aware primitive assembler
-#define KNOB_ENABLE_CUT_AWARE_PA TRUE
-
-// enables early rasterization (useful for small triangles)
-#if !defined(KNOB_ENABLE_EARLY_RAST)
-#define KNOB_ENABLE_EARLY_RAST 1
-#endif
-
-#if KNOB_ENABLE_EARLY_RAST
-#define ER_SIMD_TILE_X_SHIFT 2
-#define ER_SIMD_TILE_Y_SHIFT 2
-#endif
-
-///////////////////////////////////////////////////////////////////////////////
-// Debug knobs
-///////////////////////////////////////////////////////////////////////////////
-//#define KNOB_ENABLE_RDTSC
-
-// Set to 1 to use the dynamic KNOB_TOSS_XXXX knobs.
-#if !defined(KNOB_ENABLE_TOSS_POINTS)
-#define KNOB_ENABLE_TOSS_POINTS 0
-#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs_init.h b/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
deleted file mode 100644
index f8797a8f2bc..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file knobs_init.h
- *
- * @brief Dynamic Knobs Initialization for Core.
- *
- ******************************************************************************/
-#pragma once
-
-#include <core/knobs.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdio.h>
-
-// Assume the type is compatible with a 32-bit integer
-template <typename T>
-static inline void ConvertEnvToKnob(const char* pOverride, T& knobValue)
-{
- uint32_t value = 0;
- char* pStopped = nullptr;
- value = strtoul(pOverride, &pStopped, 0);
- if (pStopped != pOverride)
- {
- knobValue = static_cast<T>(value);
- }
-}
-
-static inline void ConvertEnvToKnob(const char* pOverride, bool& knobValue)
-{
- size_t len = strlen(pOverride);
- if (len == 1)
- {
- auto c = tolower(pOverride[0]);
- if (c == 'y' || c == 't' || c == '1')
- {
- knobValue = true;
- return;
- }
- if (c == 'n' || c == 'f' || c == '0')
- {
- knobValue = false;
- return;
- }
- }
-
- // Try converting to a number and casting to bool
- uint32_t value = 0;
- char* pStopped = nullptr;
- value = strtoul(pOverride, &pStopped, 0);
- if (pStopped != pOverride)
- {
- knobValue = value != 0;
- }
-}
-
-static inline void ConvertEnvToKnob(const char* pOverride, float& knobValue)
-{
- float value = knobValue;
- if (sscanf(pOverride, "%f", &value))
- {
- knobValue = value;
- }
-}
-
-static inline void ConvertEnvToKnob(const char* pOverride, std::string& knobValue)
-{
- knobValue = pOverride;
-}
-
-template <typename T>
-static inline void InitKnob(T& knob)
-{
- // Read environment variables
- const char* pOverride = getenv(knob.Name());
-
- if (pOverride)
- {
- auto knobValue = knob.DefaultValue();
- ConvertEnvToKnob(pOverride, knobValue);
- knob.Value(knobValue);
- }
- else
- {
- // Set default value
- knob.Value(knob.DefaultValue());
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.h b/src/gallium/drivers/swr/rasterizer/core/multisample.h
deleted file mode 100644
index 3b23974a7f4..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/multisample.h
+++ /dev/null
@@ -1,459 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file multisample.h
- *
- ******************************************************************************/
-
-#pragma once
-
-#include "context.h"
-#include "format_traits.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedef for testing for single sample case
-typedef std::integral_constant<int, 1> SingleSampleT;
-
-INLINE
-SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
-{
- switch (numSamples)
- {
- case 1:
- return SWR_MULTISAMPLE_1X;
- case 2:
- return SWR_MULTISAMPLE_2X;
- case 4:
- return SWR_MULTISAMPLE_4X;
- case 8:
- return SWR_MULTISAMPLE_8X;
- case 16:
- return SWR_MULTISAMPLE_16X;
- default:
- assert(0);
- return SWR_MULTISAMPLE_1X;
- }
-}
-
-// hardcoded offsets based on Direct3d standard multisample positions
-// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
-// coords are 0.8 fixed point offsets from (0, 0)
-template <SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
-struct MultisampleTraits
-{
- INLINE static float X(uint32_t sampleNum) = delete;
- INLINE static float Y(uint32_t sampleNum) = delete;
- INLINE static simdscalari FullSampleMask() = delete;
-
- static const uint32_t numSamples = 0;
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
-{
- INLINE static float X(uint32_t sampleNum) { return samplePosX[sampleNum]; };
- INLINE static float Y(uint32_t sampleNum) { return samplePosY[sampleNum]; };
- INLINE static simdscalari FullSampleMask() { return _simd_set1_epi32(0x1); };
-
- static const uint32_t numSamples = 1;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
- static constexpr uint32_t samplePosXi[1] = {0x80};
- static constexpr uint32_t samplePosYi[1] = {0x80};
- static constexpr float samplePosX[1] = {0.5f};
- static constexpr float samplePosY[1] = {0.5f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
-{
- INLINE static float X(uint32_t sampleNum) { return 0.5f; };
- INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
- INLINE static simdscalari FullSampleMask() { return _simd_set1_epi32(0x1); };
-
- static const uint32_t numSamples = 1;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
- static constexpr uint32_t samplePosXi[1] = {0x80};
- static constexpr uint32_t samplePosYi[1] = {0x80};
- static constexpr float samplePosX[1] = {0.5f};
- static constexpr float samplePosY[1] = {0.5f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
-{
- INLINE static float X(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosX[sampleNum];
- };
- INLINE static float Y(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosY[sampleNum];
- };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0x3);
- return mask;
- }
-
- static const uint32_t numSamples = 2;
- static const uint32_t numCoverageSamples = 2;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
- static constexpr uint32_t samplePosXi[2] = {0xC0, 0x40};
- static constexpr uint32_t samplePosYi[2] = {0xC0, 0x40};
- static constexpr float samplePosX[2] = {0.75f, 0.25f};
- static constexpr float samplePosY[2] = {0.75f, 0.25f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
-{
- INLINE static float X(uint32_t sampleNum) { return 0.5f; };
- INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0x3);
- return mask;
- }
- static const uint32_t numSamples = 2;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
- static constexpr uint32_t samplePosXi[2] = {0x80, 0x80};
- static constexpr uint32_t samplePosYi[2] = {0x80, 0x80};
- static constexpr float samplePosX[2] = {0.5f, 0.5f};
- static constexpr float samplePosY[2] = {0.5f, 0.5f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
-{
- INLINE static float X(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosX[sampleNum];
- };
- INLINE static float Y(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosY[sampleNum];
- };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0xF);
- return mask;
- }
-
- static const uint32_t numSamples = 4;
- static const uint32_t numCoverageSamples = 4;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
- static constexpr uint32_t samplePosXi[4] = {0x60, 0xE0, 0x20, 0xA0};
- static constexpr uint32_t samplePosYi[4] = {0x20, 0x60, 0xA0, 0xE0};
- static constexpr float samplePosX[4] = {0.375f, 0.875f, 0.125f, 0.625f};
- static constexpr float samplePosY[4] = {0.125f, 0.375f, 0.625f, 0.875f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
-{
- INLINE static float X(uint32_t sampleNum) { return 0.5f; };
- INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0xF);
- return mask;
- }
-
- static const uint32_t numSamples = 4;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
- static constexpr uint32_t samplePosXi[4] = {0x80, 0x80, 0x80, 0x80};
- static constexpr uint32_t samplePosYi[4] = {0x80, 0x80, 0x80, 0x80};
- static constexpr float samplePosX[4] = {0.5f, 0.5f, 0.5f, 0.5f};
- static constexpr float samplePosY[4] = {0.5f, 0.5f, 0.5f, 0.5f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
-{
- INLINE static float X(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosX[sampleNum];
- };
- INLINE static float Y(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosY[sampleNum];
- };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0xFF);
- return mask;
- }
-
- static const uint32_t numSamples = 8;
- static const uint32_t numCoverageSamples = 8;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
- static constexpr uint32_t samplePosXi[8] = {0x90, 0x70, 0xD0, 0x50, 0x30, 0x10, 0xB0, 0xF0};
- static constexpr uint32_t samplePosYi[8] = {0x50, 0xB0, 0x90, 0x30, 0xD0, 0x70, 0xF0, 0x10};
- static constexpr float samplePosX[8] = {
- 0.5625f, 0.4375f, 0.8125f, 0.3125f, 0.1875f, 0.0625f, 0.6875f, 0.9375f};
- static constexpr float samplePosY[8] = {
- 0.3125f, 0.6875f, 0.5625f, 0.1875f, 0.8125f, 0.4375f, 0.9375f, 0.0625f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
-{
- INLINE static float X(uint32_t sampleNum) { return 0.5f; };
- INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0xFF);
- return mask;
- }
- static const uint32_t numSamples = 8;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
- static constexpr uint32_t samplePosXi[8] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
- static constexpr uint32_t samplePosYi[8] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
- static constexpr float samplePosX[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
- static constexpr float samplePosY[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
-{
- INLINE static float X(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosX[sampleNum];
- };
- INLINE static float Y(uint32_t sampleNum)
- {
- SWR_ASSERT(sampleNum < numSamples);
- return samplePosY[sampleNum];
- };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0xFFFF);
- return mask;
- }
-
- static const uint32_t numSamples = 16;
- static const uint32_t numCoverageSamples = 16;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
- static constexpr uint32_t samplePosXi[16] = {0x90,
- 0x70,
- 0x50,
- 0xC0,
- 0x30,
- 0xA0,
- 0xD0,
- 0xB0,
- 0x60,
- 0x80,
- 0x40,
- 0x20,
- 0x00,
- 0xF0,
- 0xE0,
- 0x10};
- static constexpr uint32_t samplePosYi[16] = {0x90,
- 0x50,
- 0xA0,
- 0x70,
- 0x60,
- 0xD0,
- 0xB0,
- 0x30,
- 0xE0,
- 0x10,
- 0x20,
- 0xC0,
- 0x80,
- 0x40,
- 0xF0,
- 0x00};
- static constexpr float samplePosX[16] = {0.5625f,
- 0.4375f,
- 0.3125f,
- 0.7500f,
- 0.1875f,
- 0.6250f,
- 0.8125f,
- 0.6875f,
- 0.3750f,
- 0.5000f,
- 0.2500f,
- 0.1250f,
- 0.0000f,
- 0.9375f,
- 0.8750f,
- 0.0625f};
- static constexpr float samplePosY[16] = {0.5625f,
- 0.3125f,
- 0.6250f,
- 0.4375f,
- 0.3750f,
- 0.8125f,
- 0.6875f,
- 0.1875f,
- 0.8750f,
- 0.0625f,
- 0.1250f,
- 0.7500f,
- 0.5000f,
- 0.2500f,
- 0.9375f,
- 0.0000f};
-};
-
-template <>
-struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
-{
- INLINE static float X(uint32_t sampleNum) { return 0.5f; };
- INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
- INLINE static simdscalari FullSampleMask()
- {
- static const simdscalari mask = _simd_set1_epi32(0xFFFF);
- return mask;
- }
- static const uint32_t numSamples = 16;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
- static constexpr uint32_t samplePosXi[16] = {0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80};
- static constexpr uint32_t samplePosYi[16] = {0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80,
- 0x80};
- static constexpr float samplePosX[16] = {0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f};
- static constexpr float samplePosY[16] = {0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f,
- 0.5f};
-};
-
-INLINE
-bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount,
- const SWR_MULTISAMPLE_POS& samplePos)
-{
- // detect if we're using standard or center sample patterns
- const uint32_t *standardPosX, *standardPosY;
- switch (sampleCount)
- {
- case SWR_MULTISAMPLE_1X:
- standardPosX = MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
- standardPosY = MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosYi;
- break;
- case SWR_MULTISAMPLE_2X:
- standardPosX = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosXi;
- standardPosY = MultisampleTraits<SWR_MULTISAMPLE_2X>::samplePosYi;
- break;
- case SWR_MULTISAMPLE_4X:
- standardPosX = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosXi;
- standardPosY = MultisampleTraits<SWR_MULTISAMPLE_4X>::samplePosYi;
- break;
- case SWR_MULTISAMPLE_8X:
- standardPosX = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosXi;
- standardPosY = MultisampleTraits<SWR_MULTISAMPLE_8X>::samplePosYi;
- break;
- case SWR_MULTISAMPLE_16X:
- standardPosX = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosXi;
- standardPosY = MultisampleTraits<SWR_MULTISAMPLE_16X>::samplePosYi;
- break;
- default:
- break;
- }
-
- // scan sample pattern for standard or center
- uint32_t numSamples = GetNumSamples(sampleCount);
- bool bIsStandard = true;
- if (numSamples > 1)
- {
- for (uint32_t i = 0; i < numSamples; i++)
- {
- bIsStandard =
- (standardPosX[i] == samplePos.Xi(i)) || (standardPosY[i] == samplePos.Yi(i));
- if (!bIsStandard)
- break;
- }
- }
- return !bIsStandard;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h
deleted file mode 100644
index adfc1414bae..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ /dev/null
@@ -1,1676 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file pa.h
- *
- * @brief Definitions for primitive assembly.
- * N primitives are assembled at a time, where N is the SIMD width.
- * A state machine, that is specific for a given topology, drives the
- * assembly of vertices into triangles.
- *
- ******************************************************************************/
-#pragma once
-
-#include "frontend.h"
-
-struct PA_STATE
-{
-#if USE_SIMD16_FRONTEND
- enum
- {
- SIMD_WIDTH = KNOB_SIMD16_WIDTH,
- SIMD_WIDTH_DIV2 = KNOB_SIMD16_WIDTH / 2,
- SIMD_WIDTH_LOG2 = 4
- };
-
- typedef simd16mask SIMDMASK;
-
- typedef simd16scalar SIMDSCALAR;
- typedef simd16vector SIMDVECTOR;
- typedef simd16vertex SIMDVERTEX;
-
- typedef simd16scalari SIMDSCALARI;
-
-#else
- enum
- {
- SIMD_WIDTH = KNOB_SIMD_WIDTH,
- SIMD_WIDTH_DIV2 = KNOB_SIMD_WIDTH / 2,
- SIMD_WIDTH_LOG2 = 3
- };
-
- typedef simdmask SIMDMASK;
-
- typedef simdscalar SIMDSCALAR;
- typedef simdvector SIMDVECTOR;
- typedef simdvertex SIMDVERTEX;
-
- typedef simdscalari SIMDSCALARI;
-
-#endif
- DRAW_CONTEXT* pDC{nullptr}; // draw context
- uint8_t* pStreamBase{nullptr}; // vertex stream
- uint32_t streamSizeInVerts{0}; // total size of the input stream in verts
- uint32_t vertexStride{0}; // stride of a vertex in simdvector units
-
- // The topology the binner will use. In some cases the FE changes the topology from the api
- // state.
- PRIMITIVE_TOPOLOGY binTopology{TOP_UNKNOWN};
-
-#if ENABLE_AVX512_SIMD16
- bool useAlternateOffset{false};
-#endif
-
- bool viewportArrayActive{false};
- bool rtArrayActive{false};
- uint32_t numVertsPerPrim{0};
-
- PA_STATE() {}
- PA_STATE(DRAW_CONTEXT* in_pDC,
- uint8_t* in_pStreamBase,
- uint32_t in_streamSizeInVerts,
- uint32_t in_vertexStride,
- uint32_t in_numVertsPerPrim) :
- pDC(in_pDC),
- pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts),
- vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim)
- {
- }
-
- virtual bool HasWork() = 0;
- virtual simdvector& GetSimdVector(uint32_t index, uint32_t slot) = 0;
-#if ENABLE_AVX512_SIMD16
- virtual simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot) = 0;
-#endif
- virtual bool Assemble(uint32_t slot, simdvector verts[]) = 0;
-#if ENABLE_AVX512_SIMD16
- virtual bool Assemble(uint32_t slot, simd16vector verts[]) = 0;
-#endif
- virtual void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[]) = 0;
- virtual bool NextPrim() = 0;
- virtual SIMDVERTEX& GetNextVsOutput() = 0;
- virtual bool GetNextStreamOutput() = 0;
- virtual SIMDMASK& GetNextVsIndices() = 0;
- virtual uint32_t NumPrims() = 0;
- virtual void Reset() = 0;
- virtual SIMDSCALARI GetPrimID(uint32_t startID) = 0;
-};
-
-// The Optimized PA is a state machine that assembles triangles from vertex shader simd
-// output. Here is the sequence
-// 1. Execute FS/VS to generate a simd vertex (4 vertices for SSE simd and 8 for AVX simd).
-// 2. Execute PA function to assemble and bin triangles.
-// a. The PA function is a set of functions that collectively make up the
-// state machine for a given topology.
-// 1. We use a state index to track which PA function to call.
-// b. Often the PA function needs to 2 simd vertices in order to assemble the next triangle.
-// 1. We call this the current and previous simd vertex.
-// 2. The SSE simd is 4-wide which is not a multiple of 3 needed for triangles. In
-// order to assemble the second triangle, for a triangle list, we'll need the
-// last vertex from the previous simd and the first 2 vertices from the current
-// simd.
-// 3. At times the PA can assemble multiple triangles from the 2 simd vertices.
-//
-// This optimized PA is not cut aware, so only should be used by non-indexed draws or draws without
-// cuts
-struct PA_STATE_OPT : public PA_STATE
-{
- uint32_t numPrims{0}; // Total number of primitives for draw.
- uint32_t numPrimsComplete{0}; // Total number of complete primitives.
-
- uint32_t numSimdPrims{0}; // Number of prims in current simd.
-
- uint32_t cur{0}; // index to current VS output.
- uint32_t prev{0}; // index to prev VS output. Not really needed in the state.
- const uint32_t first{0}; // index to first VS output. Used for tri fan and line loop.
-
- uint32_t counter{0}; // state counter
- bool reset{false}; // reset state
-
- uint32_t primIDIncr{0}; // how much to increment for each vector (typically vector / {1, 2})
- SIMDSCALARI primID;
-
- typedef bool (*PFN_PA_FUNC)(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
- typedef bool (*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
- typedef void (*PFN_PA_SINGLE_FUNC)(PA_STATE_OPT& pa,
- uint32_t slot,
- uint32_t primIndex,
- simd4scalar verts[]);
-
- PFN_PA_FUNC pfnPaFunc{nullptr}; // PA state machine function for assembling 4 triangles.
-#if ENABLE_AVX512_SIMD16
- PFN_PA_FUNC_SIMD16 pfnPaFunc_simd16{nullptr};
-#endif
- PFN_PA_SINGLE_FUNC pfnPaSingleFunc{
- nullptr}; // PA state machine function for assembling single triangle.
- PFN_PA_FUNC pfnPaFuncReset{nullptr}; // initial state to set on reset
-#if ENABLE_AVX512_SIMD16
- PFN_PA_FUNC_SIMD16 pfnPaFuncReset_simd16{nullptr};
-#endif
-
- // state used to advance the PA when Next is called
- PFN_PA_FUNC pfnPaNextFunc{nullptr};
-#if ENABLE_AVX512_SIMD16
- PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16{nullptr};
-#endif
- uint32_t nextNumSimdPrims{0};
- uint32_t nextNumPrimsIncrement{0};
- bool nextReset{false};
- bool isStreaming{false};
-
- SIMDMASK junkIndices{0}; // temporary index store for unused virtual function
-
- PA_STATE_OPT() {}
- PA_STATE_OPT(DRAW_CONTEXT* pDC,
- uint32_t numPrims,
- uint8_t* pStream,
- uint32_t streamSizeInVerts,
- uint32_t vertexStride,
- bool in_isStreaming,
- uint32_t numVertsPerPrim,
- PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
-
- bool HasWork() { return (this->numPrimsComplete < this->numPrims) ? true : false; }
-
- simdvector& GetSimdVector(uint32_t index, uint32_t slot)
- {
- SWR_ASSERT(slot < vertexStride);
- uint32_t offset = index * vertexStride + slot;
- simdvector& vertexSlot = ((simdvector*)pStreamBase)[offset];
- return vertexSlot;
- }
-
-#if ENABLE_AVX512_SIMD16
- simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot)
- {
- SWR_ASSERT(slot < vertexStride);
- uint32_t offset = index * vertexStride + slot;
- simd16vector& vertexSlot = ((simd16vector*)pStreamBase)[offset];
- return vertexSlot;
- }
-
-#endif
- // Assembles 4 triangles. Each simdvector is a single vertex from 4
- // triangles (xxxx yyyy zzzz wwww) and there are 3 verts per triangle.
- bool Assemble(uint32_t slot, simdvector verts[]) { return this->pfnPaFunc(*this, slot, verts); }
-
-#if ENABLE_AVX512_SIMD16
- bool Assemble(uint32_t slot, simd16vector verts[])
- {
- return this->pfnPaFunc_simd16(*this, slot, verts);
- }
-
-#endif
- // Assembles 1 primitive. Each simdscalar is a vertex (xyzw).
- void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[])
- {
- return this->pfnPaSingleFunc(*this, slot, primIndex, verts);
- }
-
- bool NextPrim()
- {
- this->pfnPaFunc = this->pfnPaNextFunc;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = this->pfnPaNextFunc_simd16;
-#endif
- this->numSimdPrims = this->nextNumSimdPrims;
- this->numPrimsComplete += this->nextNumPrimsIncrement;
- this->reset = this->nextReset;
-
- if (this->isStreaming)
- {
- this->reset = false;
- }
-
- bool morePrims = false;
-
- if (this->numSimdPrims > 0)
- {
- morePrims = true;
- this->numSimdPrims--;
- }
- else
- {
- this->counter = (this->reset) ? 0 : (this->counter + 1);
- this->reset = false;
- }
-
- if (!HasWork())
- {
- morePrims = false; // no more to do
- }
-
- return morePrims;
- }
-
- SIMDVERTEX& GetNextVsOutput()
- {
- const uint32_t numSimdVerts = streamSizeInVerts / SIMD_WIDTH;
-
- // increment cur and prev indices
- if (counter < numSimdVerts)
- {
- // prev undefined for first state
- prev = cur;
- cur = counter;
- }
- else
- {
- // swap/recycle last two simd verts for prev and cur, leave other simd verts intact in
- // the buffer
- uint32_t temp = prev;
-
- prev = cur;
- cur = temp;
- }
-
- SWR_ASSERT(cur < numSimdVerts);
- SIMDVECTOR* pVertex = &((SIMDVECTOR*)pStreamBase)[cur * vertexStride];
-
- return *(SIMDVERTEX*)pVertex;
- }
-
- SIMDMASK& GetNextVsIndices()
- {
- // unused in optimized PA, pass tmp buffer back
- return junkIndices;
- }
-
- bool GetNextStreamOutput()
- {
- this->prev = this->cur;
- this->cur = this->counter;
-
- return HasWork();
- }
-
- uint32_t NumPrims()
- {
- return (this->numPrimsComplete + this->nextNumPrimsIncrement > this->numPrims)
- ? (SIMD_WIDTH -
- (this->numPrimsComplete + this->nextNumPrimsIncrement - this->numPrims))
- : SIMD_WIDTH;
- }
-
- void SetNextState(PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
- {
- this->pfnPaNextFunc = pfnPaNextFunc;
- this->nextNumSimdPrims = numSimdPrims;
- this->nextNumPrimsIncrement = numPrimsIncrement;
- this->nextReset = reset;
-
- this->pfnPaSingleFunc = pfnPaNextSingleFunc;
- }
-
-#if ENABLE_AVX512_SIMD16
- void SetNextState_simd16(PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
- PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
- {
- this->pfnPaNextFunc_simd16 = pfnPaNextFunc_simd16;
- this->pfnPaNextFunc = pfnPaNextFunc;
- this->nextNumSimdPrims = numSimdPrims;
- this->nextNumPrimsIncrement = numPrimsIncrement;
- this->nextReset = reset;
-
- this->pfnPaSingleFunc = pfnPaNextSingleFunc;
- }
-
-#endif
- void Reset()
- {
-#if ENABLE_AVX512_SIMD16
- useAlternateOffset = false;
-
-#endif
- this->pfnPaFunc = this->pfnPaFuncReset;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = this->pfnPaFuncReset_simd16;
-#endif
- this->numPrimsComplete = 0;
- this->numSimdPrims = 0;
- this->cur = 0;
- this->prev = 0;
- this->counter = 0;
- this->reset = false;
- }
-
- SIMDSCALARI GetPrimID(uint32_t startID)
- {
-#if USE_SIMD16_FRONTEND
- return _simd16_add_epi32(
- this->primID,
- _simd16_set1_epi32(startID + this->primIDIncr * (this->numPrimsComplete / SIMD_WIDTH)));
-#else
- return _simd_add_epi32(
- this->primID,
- _simd_set1_epi32(startID + this->primIDIncr * (this->numPrimsComplete / SIMD_WIDTH)));
-#endif
- }
-};
-
-// helper C wrappers to avoid having to rewrite all the PA topology state functions
-INLINE void SetNextPaState(PA_STATE_OPT& pa,
- PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
-{
- return pa.SetNextState(
- pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
-}
-
-#if ENABLE_AVX512_SIMD16
-INLINE void SetNextPaState_simd16(PA_STATE_OPT& pa,
- PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
- PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
-{
- return pa.SetNextState_simd16(pfnPaNextFunc_simd16,
- pfnPaNextFunc,
- pfnPaNextSingleFunc,
- numSimdPrims,
- numPrimsIncrement,
- reset);
-}
-
-#endif
-INLINE simdvector& PaGetSimdVector(PA_STATE& pa, uint32_t index, uint32_t slot)
-{
- return pa.GetSimdVector(index, slot);
-}
-
-#if ENABLE_AVX512_SIMD16
-INLINE simd16vector& PaGetSimdVector_simd16(PA_STATE& pa, uint32_t index, uint32_t slot)
-{
- return pa.GetSimdVector_simd16(index, slot);
-}
-
-#endif
-// Cut-aware primitive assembler.
-struct PA_STATE_CUT : public PA_STATE
-{
- SIMDMASK* pCutIndices{nullptr}; // cut indices buffer, 1 bit per vertex
- uint32_t numVerts{0}; // number of vertices available in buffer store
- uint32_t numAttribs{0}; // number of attributes
- int32_t numRemainingVerts{0}; // number of verts remaining to be assembled
- uint32_t numVertsToAssemble{0}; // total number of verts to assemble for the draw
-#if ENABLE_AVX512_SIMD16
- OSALIGNSIMD16(uint32_t)
- indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
-#else
- OSALIGNSIMD(uint32_t)
- indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
-#endif
- SIMDSCALARI vOffsets[MAX_NUM_VERTS_PER_PRIM]; // byte offsets for currently assembling simd
- uint32_t numPrimsAssembled{0}; // number of primitives that are fully assembled
- uint32_t headVertex{0}; // current unused vertex slot in vertex buffer store
- uint32_t tailVertex{0}; // beginning vertex currently assembling
- uint32_t curVertex{0}; // current unprocessed vertex
- uint32_t startPrimId{0}; // starting prim id
- SIMDSCALARI vPrimId; // vector of prim ID
- bool needOffsets{false}; // need to compute gather offsets for current SIMD
- uint32_t vertsPerPrim{0};
- bool processCutVerts{
- false}; // vertex indices with cuts should be processed as normal, otherwise they
- // are ignored. Fetch shader sends invalid verts on cuts that should be ignored
- // while the GS sends valid verts for every index
-
- simdvector junkVector; // junk simdvector for unimplemented API
-#if ENABLE_AVX512_SIMD16
- simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
-#endif
-
- // Topology state tracking
- uint32_t vert[MAX_NUM_VERTS_PER_PRIM];
- uint32_t curIndex{0};
- bool reverseWinding{false}; // indicates reverse winding for strips
- int32_t adjExtraVert{0}; // extra vert uses for tristrip w/ adj
-
- typedef void (PA_STATE_CUT::*PFN_PA_FUNC)(uint32_t vert, bool finish);
- PFN_PA_FUNC pfnPa{nullptr}; // per-topology function that processes a single vert
-
- PA_STATE_CUT() {}
- PA_STATE_CUT(DRAW_CONTEXT* pDC,
- uint8_t* in_pStream,
- uint32_t in_streamSizeInVerts,
- uint32_t in_vertexStride,
- SIMDMASK* in_pIndices,
- uint32_t in_numVerts,
- uint32_t in_numAttribs,
- PRIMITIVE_TOPOLOGY topo,
- bool in_processCutVerts,
- uint32_t in_numVertsPerPrim) :
- PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride, in_numVertsPerPrim)
- {
- numVerts = in_streamSizeInVerts;
- numAttribs = in_numAttribs;
- binTopology = topo;
- needOffsets = false;
- processCutVerts = in_processCutVerts;
-
- numVertsToAssemble = numRemainingVerts = in_numVerts;
- numPrimsAssembled = 0;
- headVertex = tailVertex = curVertex = 0;
-
- curIndex = 0;
- pCutIndices = in_pIndices;
- memset(indices, 0, sizeof(indices));
-#if USE_SIMD16_FRONTEND
- vPrimId = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-#else
- vPrimId = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
-#endif
- reverseWinding = false;
- adjExtraVert = -1;
-
- bool gsEnabled = pDC->pState->state.gsState.gsEnable;
- vertsPerPrim = NumVertsPerPrim(topo, gsEnabled);
-
- switch (topo)
- {
- case TOP_TRIANGLE_LIST:
- pfnPa = &PA_STATE_CUT::ProcessVertTriList;
- break;
- case TOP_TRI_LIST_ADJ:
- pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertTriListAdj
- : &PA_STATE_CUT::ProcessVertTriListAdjNoGs;
- break;
- case TOP_TRIANGLE_STRIP:
- pfnPa = &PA_STATE_CUT::ProcessVertTriStrip;
- break;
- case TOP_TRI_STRIP_ADJ:
- if (gsEnabled)
- {
- pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj<true>;
- }
- else
- {
- pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj<false>;
- }
- break;
-
- case TOP_POINT_LIST:
- pfnPa = &PA_STATE_CUT::ProcessVertPointList;
- break;
- case TOP_LINE_LIST:
- pfnPa = &PA_STATE_CUT::ProcessVertLineList;
- break;
- case TOP_LINE_LIST_ADJ:
- pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj
- : &PA_STATE_CUT::ProcessVertLineListAdjNoGs;
- break;
- case TOP_LINE_STRIP:
- pfnPa = &PA_STATE_CUT::ProcessVertLineStrip;
- break;
- case TOP_LISTSTRIP_ADJ:
- pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj
- : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs;
- break;
- case TOP_RECT_LIST:
- pfnPa = &PA_STATE_CUT::ProcessVertRectList;
- break;
- default:
- assert(0 && "Unimplemented topology");
- }
- }
-
- SIMDVERTEX& GetNextVsOutput()
- {
- uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
- this->headVertex = (this->headVertex + SIMD_WIDTH) % this->numVerts;
- this->needOffsets = true;
- SIMDVECTOR* pVertex = &((SIMDVECTOR*)pStreamBase)[vertexIndex * vertexStride];
-
- return *(SIMDVERTEX*)pVertex;
- }
-
- SIMDMASK& GetNextVsIndices()
- {
- uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
- SIMDMASK* pCurCutIndex = this->pCutIndices + vertexIndex;
- return *pCurCutIndex;
- }
-
- simdvector& GetSimdVector(uint32_t index, uint32_t slot)
- {
- // unused
- SWR_ASSERT(0 && "Not implemented");
- return junkVector;
- }
-
-#if ENABLE_AVX512_SIMD16
- simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot)
- {
- // unused
- SWR_ASSERT(0 && "Not implemented");
- return junkVector_simd16;
- }
-
-#endif
- bool GetNextStreamOutput()
- {
- this->headVertex += SIMD_WIDTH;
- this->needOffsets = true;
- return HasWork();
- }
-
- SIMDSCALARI GetPrimID(uint32_t startID)
- {
-#if USE_SIMD16_FRONTEND
- return _simd16_add_epi32(_simd16_set1_epi32(startID), this->vPrimId);
-#else
- return _simd_add_epi32(_simd_set1_epi32(startID), this->vPrimId);
-#endif
- }
-
- void Reset()
- {
-#if ENABLE_AVX512_SIMD16
- useAlternateOffset = false;
-
-#endif
- this->numRemainingVerts = this->numVertsToAssemble;
- this->numPrimsAssembled = 0;
- this->curIndex = 0;
- this->curVertex = 0;
- this->tailVertex = 0;
- this->headVertex = 0;
- this->reverseWinding = false;
- this->adjExtraVert = -1;
-#if USE_SIMD16_FRONTEND
- this->vPrimId = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-#else
- this->vPrimId = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
-#endif
- }
-
- bool HasWork() { return this->numRemainingVerts > 0 || this->adjExtraVert != -1; }
-
- bool IsVertexStoreFull()
- {
- return ((this->headVertex + SIMD_WIDTH) % this->numVerts) == this->tailVertex;
- }
-
- void RestartTopology()
- {
- this->curIndex = 0;
- this->reverseWinding = false;
- this->adjExtraVert = -1;
- }
-
- bool IsCutIndex(uint32_t vertex)
- {
- uint32_t vertexIndex = vertex / SIMD_WIDTH;
- uint32_t vertexOffset = vertex & (SIMD_WIDTH - 1);
- return CheckBit(this->pCutIndices[vertexIndex], vertexOffset);
- }
-
- // iterates across the unprocessed verts until we hit the end or we
- // have assembled SIMD prims
- void ProcessVerts()
- {
- while (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts > 0 &&
- this->curVertex != this->headVertex)
- {
- // if cut index, restart topology
- if (IsCutIndex(this->curVertex))
- {
- if (this->processCutVerts)
- {
- (this->*pfnPa)(this->curVertex, false);
- }
- // finish off tri strip w/ adj before restarting topo
- if (this->adjExtraVert != -1)
- {
- (this->*pfnPa)(this->curVertex, true);
- }
- RestartTopology();
- }
- else
- {
- (this->*pfnPa)(this->curVertex, false);
- }
-
- this->curVertex++;
- if (this->curVertex >= this->numVerts)
- {
- this->curVertex = 0;
- }
- this->numRemainingVerts--;
- }
-
- // special case last primitive for tri strip w/ adj
- if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts == 0 &&
- this->adjExtraVert != -1)
- {
- (this->*pfnPa)(this->curVertex, true);
- }
- }
-
- void Advance()
- {
- // done with current batch
- // advance tail to the current unsubmitted vertex
- this->tailVertex = this->curVertex;
- this->numPrimsAssembled = 0;
-#if USE_SIMD16_FRONTEND
- this->vPrimId = _simd16_add_epi32(vPrimId, _simd16_set1_epi32(SIMD_WIDTH));
-#else
- this->vPrimId = _simd_add_epi32(vPrimId, _simd_set1_epi32(SIMD_WIDTH));
-#endif
- }
-
- bool NextPrim()
- {
- // if we've assembled enough prims, we can advance to the next set of verts
- if (this->numPrimsAssembled == SIMD_WIDTH || this->numRemainingVerts <= 0)
- {
- Advance();
- }
- return false;
- }
-
- void ComputeOffsets()
- {
- for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
- {
- uint32_t vertexStrideBytes = vertexStride * sizeof(SIMDVECTOR);
- SIMDSCALARI vIndices = *(SIMDSCALARI*)&this->indices[v][0];
-
- // step to simdvertex batch
- const uint32_t simdShift = SIMD_WIDTH_LOG2;
-#if USE_SIMD16_FRONTEND
- SIMDSCALARI vVertexBatch = _simd16_srai_epi32(vIndices, simdShift);
- this->vOffsets[v] =
- _simd16_mullo_epi32(vVertexBatch, _simd16_set1_epi32(vertexStrideBytes));
-#else
- SIMDSCALARI vVertexBatch = _simd_srai_epi32(vIndices, simdShift);
- this->vOffsets[v] =
- _simd_mullo_epi32(vVertexBatch, _simd_set1_epi32(vertexStrideBytes));
-#endif
-
- // step to index
- const uint32_t simdMask = SIMD_WIDTH - 1;
-#if USE_SIMD16_FRONTEND
- SIMDSCALARI vVertexIndex = _simd16_and_si(vIndices, _simd16_set1_epi32(simdMask));
- this->vOffsets[v] = _simd16_add_epi32(
- this->vOffsets[v],
- _simd16_mullo_epi32(vVertexIndex, _simd16_set1_epi32(sizeof(float))));
-#else
- SIMDSCALARI vVertexIndex = _simd_and_si(vIndices, _simd_set1_epi32(simdMask));
- this->vOffsets[v] =
- _simd_add_epi32(this->vOffsets[v],
- _simd_mullo_epi32(vVertexIndex, _simd_set1_epi32(sizeof(float))));
-#endif
- }
- }
-
- bool Assemble(uint32_t slot, simdvector* verts)
- {
- // process any outstanding verts
- ProcessVerts();
-
- // return false if we don't have enough prims assembled
- if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts > 0)
- {
- return false;
- }
-
- // cache off gather offsets given the current SIMD set of indices the first time we get an
- // assemble
- if (this->needOffsets)
- {
- ComputeOffsets();
- this->needOffsets = false;
- }
-
- for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
- {
- SIMDSCALARI offsets = this->vOffsets[v];
-
- // step to attribute
-#if USE_SIMD16_FRONTEND
- offsets = _simd16_add_epi32(offsets, _simd16_set1_epi32(slot * sizeof(SIMDVECTOR)));
-#else
- offsets = _simd_add_epi32(offsets, _simd_set1_epi32(slot * sizeof(SIMDVECTOR)));
-#endif
-
- float* pBase = (float*)this->pStreamBase;
- for (uint32_t c = 0; c < 4; ++c)
- {
-#if USE_SIMD16_FRONTEND
- simd16scalar temp = _simd16_i32gather_ps(pBase, offsets, 1);
-
- // Assigning to a temporary first to avoid an MSVC 2017 compiler bug
- simdscalar t =
- useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
- verts[v].v[c] = t;
-#else
- verts[v].v[c] = _simd_i32gather_ps(pBase, offsets, 1);
-#endif
-
- // move base to next component
- pBase += SIMD_WIDTH;
- }
- }
-
- // compute the implied 4th vertex, v3
- if (this->binTopology == TOP_RECT_LIST)
- {
- for (uint32_t c = 0; c < 4; ++c)
- {
- // v1, v3 = v1 + v2 - v0, v2
- // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
- simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
- temp = _simd16_sub_ps(temp, verts[1].v[c]);
- temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
- verts[1].v[c] = _simd16_extract_ps(temp, 0);
- }
- }
-
- return true;
- }
-
-#if ENABLE_AVX512_SIMD16
- bool Assemble(uint32_t slot, simd16vector verts[])
- {
- // process any outstanding verts
- ProcessVerts();
-
- // return false if we don't have enough prims assembled
- if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts > 0)
- {
- return false;
- }
-
- // cache off gather offsets given the current SIMD set of indices the first time we get an
- // assemble
- if (this->needOffsets)
- {
- ComputeOffsets();
- this->needOffsets = false;
- }
-
- for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
- {
- SIMDSCALARI offsets = this->vOffsets[v];
-
- // step to attribute
-#if USE_SIMD16_FRONTEND
- offsets = _simd16_add_epi32(offsets, _simd16_set1_epi32(slot * sizeof(SIMDVECTOR)));
-#else
- offsets = _simd_add_epi32(offsets, _simd_set1_epi32(slot * sizeof(simdvector)));
-#endif
-
- float* pBase = (float*)this->pStreamBase;
- for (uint32_t c = 0; c < 4; ++c)
- {
-#if USE_SIMD16_FRONTEND
- verts[v].v[c] = _simd16_i32gather_ps(pBase, offsets, 1);
-#else
- verts[v].v[c] = _simd16_insert_ps(
- _simd16_setzero_ps(), _simd_i32gather_ps(pBase, offsets, 1), 0);
-#endif
-
- // move base to next component
- pBase += SIMD_WIDTH;
- }
- }
-
- // compute the implied 4th vertex, v3
- if (this->binTopology == TOP_RECT_LIST)
- {
- for (uint32_t c = 0; c < 4; ++c)
- {
- // v1, v3 = v1 + v2 - v0, v2
- // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
- simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
- temp = _simd16_sub_ps(temp, verts[1].v[c]);
- verts[1].v[c] =
- _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
- }
- }
-
- return true;
- }
-
-#endif
- void AssembleSingle(uint32_t slot, uint32_t triIndex, simd4scalar tri[3])
- {
- // move to slot
- for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
- {
- uint32_t* pOffset = (uint32_t*)&this->vOffsets[v];
-#if USE_SIMD16_FRONTEND
- uint32_t offset =
- useAlternateOffset ? pOffset[triIndex + SIMD_WIDTH_DIV2] : pOffset[triIndex];
-#else
- uint32_t offset = pOffset[triIndex];
-#endif
- offset += sizeof(SIMDVECTOR) * slot;
- float* pVert = (float*)&tri[v];
- for (uint32_t c = 0; c < 4; ++c)
- {
- float* pComponent = (float*)(this->pStreamBase + offset);
- pVert[c] = *pComponent;
- offset += SIMD_WIDTH * sizeof(float);
- }
- }
-
- // compute the implied 4th vertex, v3
- if ((this->binTopology == TOP_RECT_LIST) && (triIndex % 2 == 1))
- {
- // v1, v3 = v1 + v2 - v0, v2
- // v1 stored in tri[0], v0 stored in tri[1], v2 stored in tri[2]
- float* pVert0 = (float*)&tri[1];
- float* pVert1 = (float*)&tri[0];
- float* pVert2 = (float*)&tri[2];
- float* pVert3 = (float*)&tri[1];
- for (uint32_t c = 0; c < 4; ++c)
- {
- pVert3[c] = pVert1[c] + pVert2[c] - pVert0[c];
- }
- }
- }
-
- uint32_t NumPrims() { return this->numPrimsAssembled; }
-
- // Per-topology functions
- void ProcessVertTriStrip(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 3)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- if (reverseWinding)
- {
- this->indices[1][this->numPrimsAssembled] = this->vert[2];
- this->indices[2][this->numPrimsAssembled] = this->vert[1];
- }
- else
- {
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
- }
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->vert[0] = this->vert[1];
- this->vert[1] = this->vert[2];
- this->curIndex = 2;
- this->reverseWinding ^= 1;
- }
- }
-
- template <bool gsEnabled>
- void AssembleTriStripAdj()
- {
- if (!gsEnabled)
- {
- this->vert[1] = this->vert[2];
- this->vert[2] = this->vert[4];
-
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
-
- this->vert[4] = this->vert[2];
- this->vert[2] = this->vert[1];
- }
- else
- {
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
- this->indices[3][this->numPrimsAssembled] = this->vert[3];
- this->indices[4][this->numPrimsAssembled] = this->vert[4];
- this->indices[5][this->numPrimsAssembled] = this->vert[5];
- }
- this->numPrimsAssembled++;
- }
-
- template <bool gsEnabled>
- void ProcessVertTriStripAdj(uint32_t index, bool finish)
- {
- // handle last primitive of tristrip
- if (finish && this->adjExtraVert != -1)
- {
- this->vert[3] = this->adjExtraVert;
- AssembleTriStripAdj<gsEnabled>();
- this->adjExtraVert = -1;
- return;
- }
-
- switch (this->curIndex)
- {
- case 0:
- case 1:
- case 2:
- case 4:
- this->vert[this->curIndex] = index;
- this->curIndex++;
- break;
- case 3:
- this->vert[5] = index;
- this->curIndex++;
- break;
- case 5:
- if (this->adjExtraVert == -1)
- {
- this->adjExtraVert = index;
- }
- else
- {
- this->vert[3] = index;
- if (!gsEnabled)
- {
- AssembleTriStripAdj<gsEnabled>();
-
- uint32_t nextTri[6];
- if (this->reverseWinding)
- {
- nextTri[0] = this->vert[4];
- nextTri[1] = this->vert[0];
- nextTri[2] = this->vert[2];
- nextTri[4] = this->vert[3];
- nextTri[5] = this->adjExtraVert;
- }
- else
- {
- nextTri[0] = this->vert[2];
- nextTri[1] = this->adjExtraVert;
- nextTri[2] = this->vert[3];
- nextTri[4] = this->vert[4];
- nextTri[5] = this->vert[0];
- }
- for (uint32_t i = 0; i < 6; ++i)
- {
- this->vert[i] = nextTri[i];
- }
-
- this->adjExtraVert = -1;
- this->reverseWinding ^= 1;
- }
- else
- {
- this->curIndex++;
- }
- }
- break;
- case 6:
- SWR_ASSERT(this->adjExtraVert != -1, "Algorithm failure!");
- AssembleTriStripAdj<gsEnabled>();
-
- uint32_t nextTri[6];
- if (this->reverseWinding)
- {
- nextTri[0] = this->vert[4];
- nextTri[1] = this->vert[0];
- nextTri[2] = this->vert[2];
- nextTri[4] = this->vert[3];
- nextTri[5] = this->adjExtraVert;
- }
- else
- {
- nextTri[0] = this->vert[2];
- nextTri[1] = this->adjExtraVert;
- nextTri[2] = this->vert[3];
- nextTri[4] = this->vert[4];
- nextTri[5] = this->vert[0];
- }
- for (uint32_t i = 0; i < 6; ++i)
- {
- this->vert[i] = nextTri[i];
- }
- this->reverseWinding ^= 1;
- this->adjExtraVert = index;
- this->curIndex--;
- break;
- }
- }
-
- void ProcessVertTriList(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 3)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->curIndex = 0;
- }
- }
-
- void ProcessVertTriListAdj(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 6)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
- this->indices[3][this->numPrimsAssembled] = this->vert[3];
- this->indices[4][this->numPrimsAssembled] = this->vert[4];
- this->indices[5][this->numPrimsAssembled] = this->vert[5];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->curIndex = 0;
- }
- }
-
- void ProcessVertTriListAdjNoGs(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 6)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[2];
- this->indices[2][this->numPrimsAssembled] = this->vert[4];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->curIndex = 0;
- }
- }
-
- void ProcessVertLineList(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 2)
- {
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
-
- this->numPrimsAssembled++;
- this->curIndex = 0;
- }
- }
-
- void ProcessVertLineStrip(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 2)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->vert[0] = this->vert[1];
- this->curIndex = 1;
- }
- }
-
- void ProcessVertLineStripAdj(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 4)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
- this->indices[3][this->numPrimsAssembled] = this->vert[3];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->vert[0] = this->vert[1];
- this->vert[1] = this->vert[2];
- this->vert[2] = this->vert[3];
- this->curIndex = 3;
- }
- }
-
- void ProcessVertLineStripAdjNoGs(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 4)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[1];
- this->indices[1][this->numPrimsAssembled] = this->vert[2];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled++;
-
- // set up next prim state
- this->vert[0] = this->vert[1];
- this->vert[1] = this->vert[2];
- this->vert[2] = this->vert[3];
- this->curIndex = 3;
- }
- }
-
- void ProcessVertLineListAdj(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 4)
- {
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
- this->indices[3][this->numPrimsAssembled] = this->vert[3];
-
- this->numPrimsAssembled++;
- this->curIndex = 0;
- }
- }
-
- void ProcessVertLineListAdjNoGs(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 4)
- {
- this->indices[0][this->numPrimsAssembled] = this->vert[1];
- this->indices[1][this->numPrimsAssembled] = this->vert[2];
-
- this->numPrimsAssembled++;
- this->curIndex = 0;
- }
- }
-
- void ProcessVertPointList(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 1)
- {
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->numPrimsAssembled++;
- this->curIndex = 0;
- }
- }
-
- void ProcessVertRectList(uint32_t index, bool finish)
- {
- this->vert[this->curIndex] = index;
- this->curIndex++;
- if (this->curIndex == 3)
- {
- // assembled enough verts for prim, add to gather indices
- this->indices[0][this->numPrimsAssembled] = this->vert[0];
- this->indices[1][this->numPrimsAssembled] = this->vert[1];
- this->indices[2][this->numPrimsAssembled] = this->vert[2];
-
- // second triangle in the rectangle
- // v1, v3 = v1 + v2 - v0, v2
- this->indices[0][this->numPrimsAssembled + 1] = this->vert[1];
- this->indices[1][this->numPrimsAssembled + 1] = this->vert[0];
- this->indices[2][this->numPrimsAssembled + 1] = this->vert[2];
-
- // increment numPrimsAssembled
- this->numPrimsAssembled += 2;
-
- // set up next prim state
- this->curIndex = 0;
- }
- }
-};
-
-// Primitive Assembly for data output from the DomainShader.
-struct PA_TESS : PA_STATE
-{
- PA_TESS(DRAW_CONTEXT* in_pDC,
- const SIMDSCALAR* in_pVertData,
- uint32_t in_attributeStrideInVectors,
- uint32_t in_vertexStride,
- uint32_t in_numAttributes,
- uint32_t* (&in_ppIndices)[3],
- uint32_t in_numPrims,
- PRIMITIVE_TOPOLOGY in_binTopology,
- uint32_t numVertsPerPrim,
- bool SOA = true) :
-
- PA_STATE(in_pDC, nullptr, 0, in_vertexStride, numVertsPerPrim),
- m_pVertexData(in_pVertData), m_attributeStrideInVectors(in_attributeStrideInVectors),
- m_numAttributes(in_numAttributes), m_numPrims(in_numPrims), m_SOA(SOA)
- {
-#if USE_SIMD16_FRONTEND
- m_vPrimId = _simd16_setzero_si();
-#else
- m_vPrimId = _simd_setzero_si();
-#endif
- binTopology = in_binTopology;
- m_ppIndices[0] = in_ppIndices[0];
- m_ppIndices[1] = in_ppIndices[1];
- m_ppIndices[2] = in_ppIndices[2];
-
- switch (binTopology)
- {
- case TOP_POINT_LIST:
- m_numVertsPerPrim = 1;
- break;
-
- case TOP_LINE_LIST:
- m_numVertsPerPrim = 2;
- break;
-
- case TOP_TRIANGLE_LIST:
- m_numVertsPerPrim = 3;
- break;
-
- default:
- SWR_INVALID("Invalid binTopology (%d) for %s", binTopology, __FUNCTION__);
- break;
- }
- }
-
- bool HasWork() { return m_numPrims != 0; }
-
- simdvector& GetSimdVector(uint32_t index, uint32_t slot)
- {
- SWR_INVALID("%s NOT IMPLEMENTED", __FUNCTION__);
- return junkVector;
- }
-
-#if ENABLE_AVX512_SIMD16
- simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot)
- {
- SWR_INVALID("%s NOT IMPLEMENTED", __FUNCTION__);
- return junkVector_simd16;
- }
-
-#endif
- static SIMDSCALARI GenPrimMask(uint32_t numPrims)
- {
- SWR_ASSERT(numPrims <= SIMD_WIDTH);
-#if USE_SIMD16_FRONTEND
- static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
- return _simd16_loadu_si((const SIMDSCALARI*)&maskGen[SIMD_WIDTH - numPrims]);
-#else
- static const OSALIGNLINE(int32_t)
- maskGen[SIMD_WIDTH * 2] = {-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
-
- return _simd_loadu_si((const SIMDSCALARI*)&maskGen[SIMD_WIDTH - numPrims]);
-#endif
- }
-
- bool Assemble(uint32_t slot, simdvector verts[])
- {
- SWR_ASSERT(slot < m_numAttributes);
-
- uint32_t numPrimsToAssemble = PA_TESS::NumPrims();
- if (0 == numPrimsToAssemble)
- {
- return false;
- }
-
- SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
-
- const float* pBaseAttrib;
- if (m_SOA)
- {
- pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
- }
- else
- {
- const float* pVertData = (const float*)m_pVertexData;
- pBaseAttrib = pVertData + slot * 4;
- }
-
- for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
- {
-#if USE_SIMD16_FRONTEND
- SIMDSCALARI indices = _simd16_load_si((const SIMDSCALARI*)m_ppIndices[i]);
-#else
- SIMDSCALARI indices = _simd_load_si((const SIMDSCALARI*)m_ppIndices[i]);
-#endif
-
- const float* pBase = pBaseAttrib;
- for (uint32_t c = 0; c < 4; ++c)
- {
-#if USE_SIMD16_FRONTEND
- simd16scalar temp =
- _simd16_mask_i32gather_ps(_simd16_setzero_ps(),
- pBase,
- indices,
- _simd16_castsi_ps(mask),
- 4 /* gcc doesn't like sizeof(float) */);
-
- verts[i].v[c] =
- useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
-#else
- verts[i].v[c] = _simd_mask_i32gather_ps(_simd_setzero_ps(),
- pBase,
- indices,
- _simd_castsi_ps(mask),
- 4); // gcc doesn't like sizeof(float)
-#endif
- if (m_SOA)
- {
- pBase += m_attributeStrideInVectors * SIMD_WIDTH;
- }
- else
- {
- pBase += sizeof(float);
- }
- }
- }
-
- return true;
- }
-
-#if ENABLE_AVX512_SIMD16
- bool Assemble(uint32_t slot, simd16vector verts[])
- {
- SWR_ASSERT(slot < m_numAttributes);
-
- uint32_t numPrimsToAssemble = PA_TESS::NumPrims();
- if (0 == numPrimsToAssemble)
- {
- return false;
- }
-
- SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
-
- const float* pBaseAttrib;
- if (m_SOA)
- {
- pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
- }
- else
- {
- const float* pVertData = (const float*)m_pVertexData;
- pBaseAttrib = pVertData + slot * 4;
- }
-
- for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
- {
-#if USE_SIMD16_FRONTEND
- SIMDSCALARI indices = _simd16_load_si((const SIMDSCALARI*)m_ppIndices[i]);
- if (!m_SOA)
- {
- indices = _simd16_mullo_epi32(indices, _simd16_set1_epi32(vertexStride / 4));
- }
-#else
- SIMDSCALARI indices = _simd_load_si((const SIMDSCALARI*)m_ppIndices[i]);
-#endif
-
- const float* pBase = pBaseAttrib;
- for (uint32_t c = 0; c < 4; ++c)
- {
-#if USE_SIMD16_FRONTEND
- verts[i].v[c] = _simd16_mask_i32gather_ps(_simd16_setzero_ps(),
- pBase,
- indices,
- _simd16_castsi_ps(mask),
- 4 /* gcc doesn't like sizeof(float) */);
-#else
- simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(),
- pBase,
- indices,
- _simd_castsi_ps(mask),
- 4 /* gcc doesn't like sizeof(float) */);
- verts[i].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
-#endif
- if (m_SOA)
- {
- pBase += m_attributeStrideInVectors * SIMD_WIDTH;
- }
- else
- {
- pBase++;
- }
- }
- }
-
- return true;
- }
-
-#endif
- void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[])
- {
- SWR_ASSERT(slot < m_numAttributes);
-
-
- SWR_ASSERT(primIndex < PA_TESS::NumPrims());
-
- const float* pVertDataBase;
- if (m_SOA)
- {
- pVertDataBase = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
- }
- else
- {
- const float* pVertData = (const float*)m_pVertexData;
- pVertDataBase = pVertData + slot * 4;
- };
- for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
- {
-#if USE_SIMD16_FRONTEND
- uint32_t index = useAlternateOffset ? m_ppIndices[i][primIndex + SIMD_WIDTH_DIV2]
- : m_ppIndices[i][primIndex];
- if (!m_SOA)
- {
- index *= (vertexStride / 4);
- }
-#else
- uint32_t index = m_ppIndices[i][primIndex];
-#endif
- const float* pVertData = pVertDataBase;
- float* pVert = (float*)&verts[i];
-
- for (uint32_t c = 0; c < 4; ++c)
- {
- pVert[c] = pVertData[index];
- if (m_SOA)
- {
- pVertData += m_attributeStrideInVectors * SIMD_WIDTH;
- }
- else
- {
- pVertData++;
- }
- }
-
- }
- }
-
- bool NextPrim()
- {
- uint32_t numPrims = PA_TESS::NumPrims();
- m_numPrims -= numPrims;
- m_ppIndices[0] += numPrims;
- m_ppIndices[1] += numPrims;
- m_ppIndices[2] += numPrims;
-
- return HasWork();
- }
-
- SIMDVERTEX& GetNextVsOutput()
- {
- SWR_NOT_IMPL;
- return junkVertex;
- }
-
- bool GetNextStreamOutput()
- {
- SWR_NOT_IMPL;
- return false;
- }
-
- SIMDMASK& GetNextVsIndices()
- {
- SWR_NOT_IMPL;
- return junkIndices;
- }
-
- uint32_t NumPrims() { return std::min<uint32_t>(m_numPrims, SIMD_WIDTH); }
-
- void Reset() { SWR_NOT_IMPL; }
-
- SIMDSCALARI GetPrimID(uint32_t startID)
- {
-#if USE_SIMD16_FRONTEND
- return _simd16_add_epi32(_simd16_set1_epi32(startID), m_vPrimId);
-#else
- return _simd_add_epi32(_simd_set1_epi32(startID), m_vPrimId);
-#endif
- }
-
-private:
- const SIMDSCALAR* m_pVertexData = nullptr;
- uint32_t m_attributeStrideInVectors = 0;
- uint32_t m_numAttributes = 0;
- uint32_t m_numPrims = 0;
- uint32_t* m_ppIndices[3];
-
- uint32_t m_numVertsPerPrim = 0;
-
- SIMDSCALARI m_vPrimId;
-
- simdvector junkVector; // junk simdvector for unimplemented API
-#if ENABLE_AVX512_SIMD16
- simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
-#endif
- SIMDVERTEX junkVertex; // junk SIMDVERTEX for unimplemented API
- SIMDMASK junkIndices; // temporary index store for unused virtual function
-
- bool m_SOA;
-};
-
-// Primitive Assembler factory class, responsible for creating and initializing the correct
-// assembler based on state.
-template <typename IsIndexedT, typename IsCutIndexEnabledT>
-struct PA_FACTORY
-{
- PA_FACTORY(DRAW_CONTEXT* pDC,
- PRIMITIVE_TOPOLOGY in_topo,
- uint32_t numVerts,
- PA_STATE::SIMDVERTEX* pVertexStore,
- uint32_t vertexStoreSize,
- uint32_t vertexStride,
- uint32_t numVertsPerPrim) :
- topo(in_topo)
- {
-#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
- const API_STATE& state = GetApiState(pDC);
- if ((IsIndexedT::value && IsCutIndexEnabledT::value &&
- (topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST || topo == TOP_LINE_LIST ||
- topo == TOP_LINE_STRIP || topo == TOP_TRIANGLE_LIST)) ||
-
- // non-indexed draws with adjacency topologies must use cut-aware PA until we add
- // support for them in the optimized PA
- (topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ ||
- topo == TOP_TRI_STRIP_ADJ))
- {
- memset(&indexStore, 0, sizeof(indexStore));
- uint32_t numAttribs = state.feNumAttributes;
-
- new (&this->paCut) PA_STATE_CUT(pDC,
- reinterpret_cast<uint8_t*>(pVertexStore),
- vertexStoreSize * PA_STATE::SIMD_WIDTH,
- vertexStride,
- &this->indexStore[0],
- numVerts,
- numAttribs,
- state.topology,
- false,
- numVertsPerPrim);
- cutPA = true;
- }
- else
-#endif
- {
- uint32_t numPrims = GetNumPrims(in_topo, numVerts);
- new (&this->paOpt) PA_STATE_OPT(pDC,
- numPrims,
- reinterpret_cast<uint8_t*>(pVertexStore),
- vertexStoreSize * PA_STATE::SIMD_WIDTH,
- vertexStride,
- false,
- numVertsPerPrim);
- cutPA = false;
- }
- }
-
- PA_STATE& GetPA()
- {
-#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
- if (cutPA)
- {
- return this->paCut;
- }
- else
-#endif
- {
- return this->paOpt;
- }
- }
-
- PA_STATE_OPT paOpt;
- PA_STATE_CUT paCut;
-
- bool cutPA{false};
-
- PRIMITIVE_TOPOLOGY topo{TOP_UNKNOWN};
-
- PA_STATE::SIMDMASK indexStore[MAX_NUM_VERTS_PER_PRIM];
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
deleted file mode 100644
index 25d7156ac63..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
+++ /dev/null
@@ -1,3141 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file pa_avx.cpp
- *
- * @brief AVX implementation for primitive assembly.
- * N primitives are assembled at a time, where N is the SIMD width.
- * A state machine, that is specific for a given topology, drives the
- * assembly of vertices into triangles.
- *
- ******************************************************************************/
-#include "context.h"
-#include "pa.h"
-#include "frontend.h"
-
-#if (KNOB_SIMD_WIDTH == 8)
-
-INLINE simd4scalar swizzleLane0(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
- simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
-}
-
-INLINE simd4scalar swizzleLane1(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
- simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
-}
-
-INLINE simd4scalar swizzleLane2(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
- simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
-}
-
-INLINE simd4scalar swizzleLane3(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
- simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
-}
-
-INLINE simd4scalar swizzleLane4(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
- simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
-}
-
-INLINE simd4scalar swizzleLane5(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
- simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
-}
-
-INLINE simd4scalar swizzleLane6(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
- simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
-}
-
-INLINE simd4scalar swizzleLane7(const simdscalar& x,
- const simdscalar& y,
- const simdscalar& z,
- const simdscalar& w)
-{
- simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
- simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
- return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
-}
-
-INLINE simd4scalar swizzleLane0(const simdvector& v)
-{
- return swizzleLane0(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane1(const simdvector& v)
-{
- return swizzleLane1(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane2(const simdvector& v)
-{
- return swizzleLane2(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane3(const simdvector& v)
-{
- return swizzleLane3(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane4(const simdvector& v)
-{
- return swizzleLane4(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane5(const simdvector& v)
-{
- return swizzleLane5(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane6(const simdvector& v)
-{
- return swizzleLane6(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLane7(const simdvector& v)
-{
- return swizzleLane7(v.x, v.y, v.z, v.w);
-}
-
-INLINE simd4scalar swizzleLaneN(const simdvector& v, int lane)
-{
- switch (lane)
- {
- case 0:
- return swizzleLane0(v);
- case 1:
- return swizzleLane1(v);
- case 2:
- return swizzleLane2(v);
- case 3:
- return swizzleLane3(v);
- case 4:
- return swizzleLane4(v);
- case 5:
- return swizzleLane5(v);
- case 6:
- return swizzleLane6(v);
- case 7:
- return swizzleLane7(v);
- default:
- return _mm_setzero_ps();
- }
-}
-
-#if ENABLE_AVX512_SIMD16
-INLINE simd4scalar swizzleLane0(const simd16vector& v)
-{
- return swizzleLane0(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane1(const simd16vector& v)
-{
- return swizzleLane1(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane2(const simd16vector& v)
-{
- return swizzleLane2(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane3(const simd16vector& v)
-{
- return swizzleLane3(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane4(const simd16vector& v)
-{
- return swizzleLane4(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane5(const simd16vector& v)
-{
- return swizzleLane5(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane6(const simd16vector& v)
-{
- return swizzleLane6(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane7(const simd16vector& v)
-{
- return swizzleLane7(_simd16_extract_ps(v.x, 0),
- _simd16_extract_ps(v.y, 0),
- _simd16_extract_ps(v.z, 0),
- _simd16_extract_ps(v.w, 0));
-}
-
-INLINE simd4scalar swizzleLane8(const simd16vector& v)
-{
- return swizzleLane0(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLane9(const simd16vector& v)
-{
- return swizzleLane1(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneA(const simd16vector& v)
-{
- return swizzleLane2(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneB(const simd16vector& v)
-{
- return swizzleLane3(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneC(const simd16vector& v)
-{
- return swizzleLane4(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneD(const simd16vector& v)
-{
- return swizzleLane5(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneE(const simd16vector& v)
-{
- return swizzleLane6(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneF(const simd16vector& v)
-{
- return swizzleLane7(_simd16_extract_ps(v.x, 1),
- _simd16_extract_ps(v.y, 1),
- _simd16_extract_ps(v.z, 1),
- _simd16_extract_ps(v.w, 1));
-}
-
-INLINE simd4scalar swizzleLaneN(const simd16vector& v, int lane)
-{
- switch (lane)
- {
- case 0:
- return swizzleLane0(v);
- case 1:
- return swizzleLane1(v);
- case 2:
- return swizzleLane2(v);
- case 3:
- return swizzleLane3(v);
- case 4:
- return swizzleLane4(v);
- case 5:
- return swizzleLane5(v);
- case 6:
- return swizzleLane6(v);
- case 7:
- return swizzleLane7(v);
- case 8:
- return swizzleLane8(v);
- case 9:
- return swizzleLane9(v);
- case 10:
- return swizzleLaneA(v);
- case 11:
- return swizzleLaneB(v);
- case 12:
- return swizzleLaneC(v);
- case 13:
- return swizzleLaneD(v);
- case 14:
- return swizzleLaneE(v);
- case 15:
- return swizzleLaneF(v);
- default:
- return _mm_setzero_ps();
- }
-}
-
-#endif
-bool PaTriList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaTriList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaTriList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaTriList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaTriListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaTriStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaTriStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaTriStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaTriFan0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaQuadList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaQuadList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaQuadList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaQuadListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaLineLoop0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaLineLoop1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaLineLoop0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaLineLoop1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaLineLoopSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaLineList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaLineList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaLineList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaLineStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaLineStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaLineStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaLineStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaPoints0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-bool PaRectList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaRectList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-bool PaRectList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
-#if ENABLE_AVX512_SIMD16
-bool PaRectList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaRectList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-bool PaRectList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
-#endif
-void PaRectListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
-
-template <uint32_t TotalControlPoints>
-void PaPatchListSingle(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
- // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
- // KNOB_SIMD_WIDTH * 1 patch. This function is called once per attribute.
- // Each attribute has 4 components.
-
- /// @todo Optimize this
-
-#if USE_SIMD16_FRONTEND
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
-#endif
- float* pOutVec = (float*)verts;
-
- for (uint32_t cp = 0; cp < TotalControlPoints; ++cp)
- {
- uint32_t input_cp = primIndex * TotalControlPoints + cp;
-#if USE_SIMD16_FRONTEND
- uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
- uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
-
-#else
- uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
- uint32_t input_lane = input_cp % KNOB_SIMD_WIDTH;
-
-#endif
- // Loop over all components of the attribute
- for (uint32_t i = 0; i < 4; ++i)
- {
-#if USE_SIMD16_FRONTEND
- const float* pInputVec =
- (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
-#else
- const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
-#endif
- pOutVec[cp * 4 + i] = pInputVec[input_lane];
- }
- }
-}
-
-template <uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
-static bool PaPatchList(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa,
- PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
- PaPatchListSingle<TotalControlPoints>);
-
- return false;
-}
-
-template <uint32_t TotalControlPoints>
-static bool PaPatchListTerm(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
- // KNOB_SIMD_WIDTH * 1 patch. This function is called once per attribute.
- // Each attribute has 4 components.
-
- /// @todo Optimize this
-
-#if USE_SIMD16_FRONTEND
- uint32_t lane_offset = 0;
-
- if (pa.useAlternateOffset)
- {
- lane_offset = KNOB_SIMD_WIDTH;
- }
-
-#endif
- // Loop over all components of the attribute
- for (uint32_t i = 0; i < 4; ++i)
- {
- for (uint32_t cp = 0; cp < TotalControlPoints; ++cp)
- {
- float vec[KNOB_SIMD_WIDTH];
- for (uint32_t lane = 0; lane < KNOB_SIMD_WIDTH; ++lane)
- {
-#if USE_SIMD16_FRONTEND
- uint32_t input_cp = (lane + lane_offset) * TotalControlPoints + cp;
- uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
- uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
-
- const float* pInputVec =
- (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
-#else
- uint32_t input_cp = lane * TotalControlPoints + cp;
- uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
- uint32_t input_lane = input_cp % KNOB_SIMD_WIDTH;
-
- const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
-#endif
- vec[lane] = pInputVec[input_lane];
- }
- verts[cp][i] = _simd_loadu_ps(vec);
- }
- }
-
- SetNextPaState(pa,
- PaPatchList<TotalControlPoints>,
- PaPatchListSingle<TotalControlPoints>,
- 0,
- PA_STATE_OPT::SIMD_WIDTH,
- true);
-
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-template <uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
-static bool PaPatchList_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa,
- PaPatchList_simd16<TotalControlPoints, CurrentControlPoints + 1>,
- PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
- PaPatchListSingle<TotalControlPoints>);
-
- return false;
-}
-
-template <uint32_t TotalControlPoints>
-static bool PaPatchListTerm_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
- // KNOB_SIMD16_WIDTH * 1 patch. This function is called once per attribute.
- // Each attribute has 4 components.
-
- /// @todo Optimize this
-
- // Loop over all components of the attribute
- for (uint32_t i = 0; i < 4; ++i)
- {
- for (uint32_t cp = 0; cp < TotalControlPoints; ++cp)
- {
- float vec[KNOB_SIMD16_WIDTH];
- for (uint32_t lane = 0; lane < KNOB_SIMD16_WIDTH; ++lane)
- {
- uint32_t input_cp = lane * TotalControlPoints + cp;
- uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
- uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
-
- const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
- vec[lane] = pInputVec[input_lane];
- }
- verts[cp][i] = _simd16_loadu_ps(vec);
- }
- }
-
- SetNextPaState_simd16(pa,
- PaPatchList_simd16<TotalControlPoints>,
- PaPatchList<TotalControlPoints>,
- PaPatchListSingle<TotalControlPoints>,
- 0,
- PA_STATE_OPT::SIMD_WIDTH,
- true);
-
- return true;
-}
-
-#endif
-#define PA_PATCH_LIST_TERMINATOR(N) \
- template <> \
- bool PaPatchList<N, N>(PA_STATE_OPT & pa, uint32_t slot, simdvector verts[]) \
- { \
- return PaPatchListTerm<N>(pa, slot, verts); \
- }
-PA_PATCH_LIST_TERMINATOR(1)
-PA_PATCH_LIST_TERMINATOR(2)
-PA_PATCH_LIST_TERMINATOR(3)
-PA_PATCH_LIST_TERMINATOR(4)
-PA_PATCH_LIST_TERMINATOR(5)
-PA_PATCH_LIST_TERMINATOR(6)
-PA_PATCH_LIST_TERMINATOR(7)
-PA_PATCH_LIST_TERMINATOR(8)
-PA_PATCH_LIST_TERMINATOR(9)
-PA_PATCH_LIST_TERMINATOR(10)
-PA_PATCH_LIST_TERMINATOR(11)
-PA_PATCH_LIST_TERMINATOR(12)
-PA_PATCH_LIST_TERMINATOR(13)
-PA_PATCH_LIST_TERMINATOR(14)
-PA_PATCH_LIST_TERMINATOR(15)
-PA_PATCH_LIST_TERMINATOR(16)
-PA_PATCH_LIST_TERMINATOR(17)
-PA_PATCH_LIST_TERMINATOR(18)
-PA_PATCH_LIST_TERMINATOR(19)
-PA_PATCH_LIST_TERMINATOR(20)
-PA_PATCH_LIST_TERMINATOR(21)
-PA_PATCH_LIST_TERMINATOR(22)
-PA_PATCH_LIST_TERMINATOR(23)
-PA_PATCH_LIST_TERMINATOR(24)
-PA_PATCH_LIST_TERMINATOR(25)
-PA_PATCH_LIST_TERMINATOR(26)
-PA_PATCH_LIST_TERMINATOR(27)
-PA_PATCH_LIST_TERMINATOR(28)
-PA_PATCH_LIST_TERMINATOR(29)
-PA_PATCH_LIST_TERMINATOR(30)
-PA_PATCH_LIST_TERMINATOR(31)
-PA_PATCH_LIST_TERMINATOR(32)
-#undef PA_PATCH_LIST_TERMINATOR
-
-#if ENABLE_AVX512_SIMD16
-#define PA_PATCH_LIST_TERMINATOR_SIMD16(N) \
- template <> \
- bool PaPatchList_simd16<N, N>(PA_STATE_OPT & pa, uint32_t slot, simd16vector verts[]) \
- { \
- return PaPatchListTerm_simd16<N>(pa, slot, verts); \
- }
-PA_PATCH_LIST_TERMINATOR_SIMD16(1)
-PA_PATCH_LIST_TERMINATOR_SIMD16(2)
-PA_PATCH_LIST_TERMINATOR_SIMD16(3)
-PA_PATCH_LIST_TERMINATOR_SIMD16(4)
-PA_PATCH_LIST_TERMINATOR_SIMD16(5)
-PA_PATCH_LIST_TERMINATOR_SIMD16(6)
-PA_PATCH_LIST_TERMINATOR_SIMD16(7)
-PA_PATCH_LIST_TERMINATOR_SIMD16(8)
-PA_PATCH_LIST_TERMINATOR_SIMD16(9)
-PA_PATCH_LIST_TERMINATOR_SIMD16(10)
-PA_PATCH_LIST_TERMINATOR_SIMD16(11)
-PA_PATCH_LIST_TERMINATOR_SIMD16(12)
-PA_PATCH_LIST_TERMINATOR_SIMD16(13)
-PA_PATCH_LIST_TERMINATOR_SIMD16(14)
-PA_PATCH_LIST_TERMINATOR_SIMD16(15)
-PA_PATCH_LIST_TERMINATOR_SIMD16(16)
-PA_PATCH_LIST_TERMINATOR_SIMD16(17)
-PA_PATCH_LIST_TERMINATOR_SIMD16(18)
-PA_PATCH_LIST_TERMINATOR_SIMD16(19)
-PA_PATCH_LIST_TERMINATOR_SIMD16(20)
-PA_PATCH_LIST_TERMINATOR_SIMD16(21)
-PA_PATCH_LIST_TERMINATOR_SIMD16(22)
-PA_PATCH_LIST_TERMINATOR_SIMD16(23)
-PA_PATCH_LIST_TERMINATOR_SIMD16(24)
-PA_PATCH_LIST_TERMINATOR_SIMD16(25)
-PA_PATCH_LIST_TERMINATOR_SIMD16(26)
-PA_PATCH_LIST_TERMINATOR_SIMD16(27)
-PA_PATCH_LIST_TERMINATOR_SIMD16(28)
-PA_PATCH_LIST_TERMINATOR_SIMD16(29)
-PA_PATCH_LIST_TERMINATOR_SIMD16(30)
-PA_PATCH_LIST_TERMINATOR_SIMD16(31)
-PA_PATCH_LIST_TERMINATOR_SIMD16(32)
-#undef PA_PATCH_LIST_TERMINATOR_SIMD16
-
-#endif
-bool PaTriList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaTriList1, PaTriListSingle0);
- return false; // Not enough vertices to assemble 4 or 8 triangles.
-}
-
-bool PaTriList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaTriList2, PaTriListSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
-}
-
-bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if KNOB_ARCH == KNOB_ARCH_AVX
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
- simdvector c;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- c[i] = _simd16_extract_ps(b_16[i], 0);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector& c_16 = PaGetSimdVector_simd16(pa, 2, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 1);
- b[i] = _simd16_extract_ps(c_16[i], 0);
- c[i] = _simd16_extract_ps(c_16[i], 1);
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, 0, slot);
- simdvector& b = PaGetSimdVector(pa, 1, slot);
- simdvector& c = PaGetSimdVector(pa, 2, slot);
-
-#endif
- simdscalar s;
-
- // Tri Pattern - provoking vertex is always v0
- // v0 -> 0 3 6 9 12 15 18 21
- // v1 -> 1 4 7 10 13 16 19 22
- // v2 -> 2 5 8 11 14 17 20 23
-
- for (int i = 0; i < 4; ++i)
- {
- simdvector& v0 = verts[0];
- v0[i] = _simd_blend_ps(a[i], b[i], 0x92);
- v0[i] = _simd_blend_ps(v0[i], c[i], 0x24);
- v0[i] = _simd_permute_ps_i(v0[i], 0x6C);
- s = _simd_permute2f128_ps(v0[i], v0[i], 0x21);
- v0[i] = _simd_blend_ps(v0[i], s, 0x44);
-
- simdvector& v1 = verts[1];
- v1[i] = _simd_blend_ps(a[i], b[i], 0x24);
- v1[i] = _simd_blend_ps(v1[i], c[i], 0x49);
- v1[i] = _simd_permute_ps_i(v1[i], 0xB1);
- s = _simd_permute2f128_ps(v1[i], v1[i], 0x21);
- v1[i] = _simd_blend_ps(v1[i], s, 0x66);
-
- simdvector& v2 = verts[2];
- v2[i] = _simd_blend_ps(a[i], b[i], 0x49);
- v2[i] = _simd_blend_ps(v2[i], c[i], 0x92);
- v2[i] = _simd_permute_ps_i(v2[i], 0xC6);
- s = _simd_permute2f128_ps(v2[i], v2[i], 0x21);
- v2[i] = _simd_blend_ps(v2[i], s, 0x22);
- }
-
-#elif KNOB_ARCH >= KNOB_ARCH_AVX2
- const simdscalari perm0 = _simd_set_epi32(5, 2, 7, 4, 1, 6, 3, 0);
- const simdscalari perm1 = _simd_set_epi32(6, 3, 0, 5, 2, 7, 4, 1);
- const simdscalari perm2 = _simd_set_epi32(7, 4, 1, 6, 3, 0, 5, 2);
-
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
- simdvector c;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- c[i] = _simd16_extract_ps(b_16[i], 0);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector& c_16 = PaGetSimdVector_simd16(pa, 2, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 1);
- b[i] = _simd16_extract_ps(c_16[i], 0);
- c[i] = _simd16_extract_ps(c_16[i], 1);
- }
- }
-
-#else
- const simdvector& a = PaGetSimdVector(pa, 0, slot);
- const simdvector& b = PaGetSimdVector(pa, 1, slot);
- const simdvector& c = PaGetSimdVector(pa, 2, slot);
-
-#endif
- // v0 -> a0 a3 a6 b1 b4 b7 c2 c5
- // v1 -> a1 a4 a7 b2 b5 c0 c3 c6
- // v2 -> a2 a5 b0 b3 b6 c1 c4 c7
-
- simdvector& v0 = verts[0];
- simdvector& v1 = verts[1];
- simdvector& v2 = verts[2];
-
- // for simd x, y, z, and w
- for (int i = 0; i < 4; ++i)
- {
- simdscalar temp0 = _simd_blend_ps(_simd_blend_ps(a[i], b[i], 0x92), c[i], 0x24);
- simdscalar temp1 = _simd_blend_ps(_simd_blend_ps(a[i], b[i], 0x24), c[i], 0x49);
- simdscalar temp2 = _simd_blend_ps(_simd_blend_ps(a[i], b[i], 0x49), c[i], 0x92);
-
- v0[i] = _simd_permute_ps(temp0, perm0);
- v1[i] = _simd_permute_ps(temp1, perm1);
- v2[i] = _simd_permute_ps(temp2, perm2);
- }
-
-#endif
- SetNextPaState(pa, PaTriList0, PaTriListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaTriList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaTriList1_simd16, PaTriList1, PaTriListSingle0);
- return false; // Not enough vertices to assemble 16 triangles
-}
-
-bool PaTriList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaTriList2_simd16, PaTriList2, PaTriListSingle0);
- return false; // Not enough vertices to assemble 16 triangles
-}
-
-bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
-#if KNOB_ARCH >= KNOB_ARCH_AVX2
- const simd16scalari perm0 = _simd16_set_epi32(13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3, 0);
- const simd16scalari perm1 = _simd16_set_epi32(14, 11, 8, 5, 2, 15, 12, 9, 6, 3, 0, 13, 10, 7, 4, 1);
- const simd16scalari perm2 = _simd16_set_epi32(15, 12, 9, 6, 3, 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2);
-#else // KNOB_ARCH == KNOB_ARCH_AVX
- simd16scalar perm0 = _simd16_setzero_ps();
- simd16scalar perm1 = _simd16_setzero_ps();
- simd16scalar perm2 = _simd16_setzero_ps();
-#endif
-
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector& c = PaGetSimdVector_simd16(pa, 2, slot);
-
- const simd16mask mask0 = 0x4924;
- const simd16mask mask1 = 0x2492;
- const simd16mask mask2 = 0x9249;
-
- // v0 -> a0 a3 a6 a9 aC aF b2 b5 b8 bB bE c1 c4 c7 cA cD
- // v1 -> a1 a4 a7 aA aD b0 b3 b6 b9 bC bF c2 c5 c8 cB cE
- // v2 -> a2 a5 a8 aB aE b1 b4 b7 bA bD c0 c3 c6 c9 cC cF
-
- simd16vector& v0 = verts[0];
- simd16vector& v1 = verts[1];
- simd16vector& v2 = verts[2];
-
- // for simd16 x, y, z, and w
- for (int i = 0; i < 4; i += 1)
- {
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
- simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float*>(&c[i]));
-
- simd16scalar temp0 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask0), tempc, mask1);
- simd16scalar temp1 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask2), tempc, mask0);
- simd16scalar temp2 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask1), tempc, mask2);
-
-#if KNOB_ARCH >= KNOB_ARCH_AVX2
- v0[i] = _simd16_permute_ps(temp0, perm0);
- v1[i] = _simd16_permute_ps(temp1, perm1);
- v2[i] = _simd16_permute_ps(temp2, perm2);
-#else // #if KNOB_ARCH == KNOB_ARCH_AVX
-
- // the general permutes (above) are prohibitively slow to emulate on AVX (its scalar code)
-
- temp0 = _simd16_permute_ps_i(temp0, 0x6C); // (0, 3, 2, 1) => 00 11 01 10 => 0x6C
- perm0 = _simd16_permute2f128_ps(temp0, temp0, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- temp0 = _simd16_blend_ps(temp0, perm0, 0x4444); // 0010 0010 0010 0010
- perm0 = _simd16_permute2f128_ps(temp0, temp0, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
- v0[i] = _simd16_blend_ps(temp0, perm0, 0x3838); // 0001 1100 0001 1100
-
- temp1 = _simd16_permute_ps_i(temp1, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- perm1 = _simd16_permute2f128_ps(temp1, temp1, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- temp1 = _simd16_blend_ps(temp1, perm1, 0x6666); // 0010 0010 0010 0010
- perm1 = _simd16_permute2f128_ps(temp1, temp1, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
- v1[i] = _simd16_blend_ps(temp1, perm1, 0x1818); // 0001 1000 0001 1000
-
- temp2 = _simd16_permute_ps_i(temp2, 0xC6); // (2, 1, 0, 3) => 01 10 00 11 => 0xC6
- perm2 = _simd16_permute2f128_ps(temp2, temp2, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- temp2 = _simd16_blend_ps(temp2, perm2, 0x2222); // 0100 0100 0100 0100
- perm2 = _simd16_permute2f128_ps(temp2, temp2, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
- v2[i] = _simd16_blend_ps(temp2, perm2, 0x1C1C); // 0011 1000 0011 1000
-#endif
- }
-
- SetNextPaState_simd16(pa, PaTriList0_simd16, PaTriList0, PaTriListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-
- // clang-format on
-}
-
-#endif
-void PaTriListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector& c = PaGetSimdVector_simd16(pa, 2, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- // v0 -> a0 a3 a6 a9 aC aF b2 b5 b8 bB bE c1 c4 c7 cA cD
- // v1 -> a1 a4 a7 aA aD b0 b3 b6 b9 bC bF c2 c5 c8 cB cE
- // v2 -> a2 a5 a8 aB aE b1 b4 b7 bA bD c0 c3 c6 c9 cC cF
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- verts[0] = swizzleLane3(a);
- verts[1] = swizzleLane4(a);
- verts[2] = swizzleLane5(a);
- break;
- case 2:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- verts[2] = swizzleLane8(a);
- break;
- case 3:
- verts[0] = swizzleLane9(a);
- verts[1] = swizzleLaneA(a);
- verts[2] = swizzleLaneB(a);
- break;
- case 4:
- verts[0] = swizzleLaneC(a);
- verts[1] = swizzleLaneD(a);
- verts[2] = swizzleLaneE(a);
- break;
- case 5:
- verts[0] = swizzleLaneF(a);
- verts[1] = swizzleLane0(b);
- verts[2] = swizzleLane1(b);
- break;
- case 6:
- verts[0] = swizzleLane2(b);
- verts[1] = swizzleLane3(b);
- verts[2] = swizzleLane4(b);
- break;
- case 7:
- verts[0] = swizzleLane5(b);
- verts[1] = swizzleLane6(b);
- verts[2] = swizzleLane7(b);
- break;
- case 8:
- verts[0] = swizzleLane8(b);
- verts[1] = swizzleLane9(b);
- verts[2] = swizzleLaneA(b);
- break;
- case 9:
- verts[0] = swizzleLaneB(b);
- verts[1] = swizzleLaneC(b);
- verts[2] = swizzleLaneD(b);
- break;
- case 10:
- verts[0] = swizzleLaneE(b);
- verts[1] = swizzleLaneF(b);
- verts[2] = swizzleLane0(c);
- break;
- case 11:
- verts[0] = swizzleLane1(c);
- verts[1] = swizzleLane2(c);
- verts[2] = swizzleLane3(c);
- break;
- case 12:
- verts[0] = swizzleLane4(c);
- verts[1] = swizzleLane5(c);
- verts[2] = swizzleLane6(c);
- break;
- case 13:
- verts[0] = swizzleLane7(c);
- verts[1] = swizzleLane8(c);
- verts[2] = swizzleLane9(c);
- break;
- case 14:
- verts[0] = swizzleLaneA(c);
- verts[1] = swizzleLaneB(c);
- verts[2] = swizzleLaneC(c);
- break;
- case 15:
- verts[0] = swizzleLaneD(c);
- verts[1] = swizzleLaneE(c);
- verts[2] = swizzleLaneF(c);
- break;
- };
-#else
- // We have 12 simdscalars contained within 3 simdvectors which
- // hold at least 8 triangles worth of data. We want to assemble a single
- // triangle with data in horizontal form.
-
- const simdvector& a = PaGetSimdVector(pa, 0, slot);
- const simdvector& b = PaGetSimdVector(pa, 1, slot);
- const simdvector& c = PaGetSimdVector(pa, 2, slot);
-
- // Convert from vertical to horizontal.
- // Tri Pattern - provoking vertex is always v0
- // v0 -> 0 3 6 9 12 15 18 21
- // v1 -> 1 4 7 10 13 16 19 22
- // v2 -> 2 5 8 11 14 17 20 23
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- verts[0] = swizzleLane3(a);
- verts[1] = swizzleLane4(a);
- verts[2] = swizzleLane5(a);
- break;
- case 2:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- verts[2] = swizzleLane0(b);
- break;
- case 3:
- verts[0] = swizzleLane1(b);
- verts[1] = swizzleLane2(b);
- verts[2] = swizzleLane3(b);
- break;
- case 4:
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane5(b);
- verts[2] = swizzleLane6(b);
- break;
- case 5:
- verts[0] = swizzleLane7(b);
- verts[1] = swizzleLane0(c);
- verts[2] = swizzleLane1(c);
- break;
- case 6:
- verts[0] = swizzleLane2(c);
- verts[1] = swizzleLane3(c);
- verts[2] = swizzleLane4(c);
- break;
- case 7:
- verts[0] = swizzleLane5(c);
- verts[1] = swizzleLane6(c);
- verts[2] = swizzleLane7(c);
- break;
- };
-#endif
-}
-
-bool PaTriStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaTriStrip1, PaTriStripSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
-}
-
-bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
- simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
-
-#endif
- simdscalar s;
-
- for (int i = 0; i < 4; ++i)
- {
- simdscalar a0 = a[i];
- simdscalar b0 = b[i];
-
- // Tri Pattern - provoking vertex is always v0
- // v0 -> 01234567
- // v1 -> 13355779
- // v2 -> 22446688
- simdvector& v0 = verts[0];
- v0[i] = a0;
-
- // s -> 4567891011
- s = _simd_permute2f128_ps(a0, b0, 0x21);
- // s -> 23456789
- s = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
-
- simdvector& v1 = verts[1];
- // v1 -> 13355779
- v1[i] = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(3, 1, 3, 1));
-
- simdvector& v2 = verts[2];
- // v2 -> 22446688
- v2[i] = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(2, 2, 2, 2));
- }
-
- SetNextPaState(pa, PaTriStrip1, PaTriStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaTriStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaTriStrip1_simd16, PaTriStrip1, PaTriStripSingle0);
- return false; // Not enough vertices to assemble 16 triangles.
-}
-
-bool PaTriStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
- const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- const simd16mask mask0 = 0xF000;
-
- // v0 -> a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
- // v1 -> a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
- // v2 -> a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
-
- simd16vector& v0 = verts[0];
- simd16vector& v1 = verts[1];
- simd16vector& v2 = verts[2];
-
- // for simd16 x, y, z, and w
- for (int i = 0; i < 4; i += 1)
- {
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
-
- simd16scalar perm0 = _simd16_permute2f128_ps(tempa, tempa, 0x39); // (0 3 2 1) = 00 11 10 01 // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF a0 a1 a2 a3
- simd16scalar perm1 = _simd16_permute2f128_ps(tempb, tempb, 0x39); // (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
-
- simd16scalar blend = _simd16_blend_ps(perm0, perm1, mask0); // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1 b2 b3
- simd16scalar shuff = _simd16_shuffle_ps(tempa, blend, _MM_SHUFFLE(1, 0, 3, 2)); // a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1
-
- v0[i] = tempa; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
- v1[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(3, 1, 3, 1)); // a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
- v2[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(2, 2, 2, 2)); // a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
- }
-
- SetNextPaState_simd16(pa, PaTriStrip1_simd16, PaTriStrip1, PaTriStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-
- // clang-format on
-}
-
-#endif
-void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- // v0 -> a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
- // v1 -> a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
- // v2 -> a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- verts[0] = swizzleLane1(a);
- verts[1] = swizzleLane3(a);
- verts[2] = swizzleLane2(a);
- break;
- case 2:
- verts[0] = swizzleLane2(a);
- verts[1] = swizzleLane3(a);
- verts[2] = swizzleLane4(a);
- break;
- case 3:
- verts[0] = swizzleLane3(a);
- verts[1] = swizzleLane5(a);
- verts[2] = swizzleLane4(a);
- break;
- case 4:
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- verts[2] = swizzleLane6(a);
- break;
- case 5:
- verts[0] = swizzleLane5(a);
- verts[1] = swizzleLane7(a);
- verts[2] = swizzleLane6(a);
- break;
- case 6:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- verts[2] = swizzleLane8(a);
- break;
- case 7:
- verts[0] = swizzleLane7(a);
- verts[1] = swizzleLane9(a);
- verts[2] = swizzleLane8(a);
- break;
- case 8:
- verts[0] = swizzleLane8(a);
- verts[1] = swizzleLane9(a);
- verts[2] = swizzleLaneA(a);
- break;
- case 9:
- verts[0] = swizzleLane9(a);
- verts[1] = swizzleLaneB(a);
- verts[2] = swizzleLaneA(a);
- break;
- case 10:
- verts[0] = swizzleLaneA(a);
- verts[1] = swizzleLaneB(a);
- verts[2] = swizzleLaneC(a);
- break;
- case 11:
- verts[0] = swizzleLaneB(a);
- verts[1] = swizzleLaneD(a);
- verts[2] = swizzleLaneC(a);
- break;
- case 12:
- verts[0] = swizzleLaneC(a);
- verts[1] = swizzleLaneD(a);
- verts[2] = swizzleLaneE(a);
- break;
- case 13:
- verts[0] = swizzleLaneD(a);
- verts[1] = swizzleLaneF(a);
- verts[2] = swizzleLaneE(a);
- break;
- case 14:
- verts[0] = swizzleLaneE(a);
- verts[1] = swizzleLaneF(a);
- verts[2] = swizzleLane0(b);
- break;
- case 15:
- verts[0] = swizzleLaneF(a);
- verts[1] = swizzleLane1(b);
- verts[2] = swizzleLane0(b);
- break;
- };
-#else
- const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
-
- // Convert from vertical to horizontal.
- // Tri Pattern - provoking vertex is always v0
- // v0 -> 01234567
- // v1 -> 13355779
- // v2 -> 22446688
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- verts[0] = swizzleLane1(a);
- verts[1] = swizzleLane3(a);
- verts[2] = swizzleLane2(a);
- break;
- case 2:
- verts[0] = swizzleLane2(a);
- verts[1] = swizzleLane3(a);
- verts[2] = swizzleLane4(a);
- break;
- case 3:
- verts[0] = swizzleLane3(a);
- verts[1] = swizzleLane5(a);
- verts[2] = swizzleLane4(a);
- break;
- case 4:
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- verts[2] = swizzleLane6(a);
- break;
- case 5:
- verts[0] = swizzleLane5(a);
- verts[1] = swizzleLane7(a);
- verts[2] = swizzleLane6(a);
- break;
- case 6:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- verts[2] = swizzleLane0(b);
- break;
- case 7:
- verts[0] = swizzleLane7(a);
- verts[1] = swizzleLane1(b);
- verts[2] = swizzleLane0(b);
- break;
- };
-#endif
-}
-
-bool PaTriFan0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaTriFan1, PaTriFanSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
-}
-
-bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if USE_SIMD16_FRONTEND
- simdvector leadVert;
- simdvector a;
- simdvector b;
-
- const simd16vector& leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- leadVert[i] = _simd16_extract_ps(leadvert_16[i], 0);
-
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- leadVert[i] = _simd16_extract_ps(leadvert_16[i], 0);
-
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- }
- }
-
-#else
- const simdvector& leadVert = PaGetSimdVector(pa, pa.first, slot);
- const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
-
-#endif
- simdscalar s;
-
- // need to fill vectors 1/2 with new verts, and v0 with anchor vert.
- for (int i = 0; i < 4; ++i)
- {
- simdscalar a0 = a[i];
- simdscalar b0 = b[i];
-
- simdscalar comp = leadVert[i];
-
- simdvector& v0 = verts[0];
- v0[i] = _simd_shuffle_ps(comp, comp, _MM_SHUFFLE(0, 0, 0, 0));
- v0[i] = _simd_permute2f128_ps(v0[i], comp, 0x00);
-
- simdvector& v2 = verts[2];
- s = _simd_permute2f128_ps(a0, b0, 0x21);
- v2[i] = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
-
- simdvector& v1 = verts[1];
- v1[i] = _simd_shuffle_ps(a0, v2[i], _MM_SHUFFLE(2, 1, 2, 1));
- }
-
- SetNextPaState(pa, PaTriFan1, PaTriFanSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaTriFan1_simd16, PaTriFan1, PaTriFanSingle0);
- return false; // Not enough vertices to assemble 16 triangles.
-}
-
-bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
- const simd16vector& a = PaGetSimdVector_simd16(pa, pa.first, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector& c = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- const simd16mask mask0 = 0xF000;
-
- // v0 -> a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
- // v1 -> b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
- // v2 -> b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
-
- simd16vector& v0 = verts[0];
- simd16vector& v1 = verts[1];
- simd16vector& v2 = verts[2];
-
- // for simd16 x, y, z, and w
- for (uint32_t i = 0; i < 4; i += 1)
- {
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
- simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float*>(&c[i]));
-
- simd16scalar shuff = _simd16_shuffle_ps(tempa, tempa, _MM_SHUFFLE(0, 0, 0, 0)); // a0 a0 a0 a0 a4 a4 a4 a4 a0 a0 a0 a0 a4 a4 a4 a4
-
- v0[i] = _simd16_permute2f128_ps(shuff, shuff, 0x00); // a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
-
- simd16scalar temp0 = _simd16_permute2f128_ps(tempb, tempb, 0x39); // (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
- simd16scalar temp1 = _simd16_permute2f128_ps(tempc, tempc, 0x39); // (0 3 2 1) = 00 11 10 01 // c4 c5 c6 c7 c8 c9 cA cB cC cD cE cF c0 c1 c2 c3
-
- simd16scalar blend = _simd16_blend_ps(temp0, temp1, mask0); // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1 c2 c3
-
- simd16scalar temp2 = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2)); // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
-
- v1[i] = _simd16_shuffle_ps(tempb, temp2, _MM_SHUFFLE(2, 1, 2, 1)); // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
- v2[i] = temp2; // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
- }
-
- SetNextPaState_simd16(pa, PaTriFan1_simd16, PaTriFan1, PaTriFanSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-
- // clang-format on
-}
-
-#endif
-void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, pa.first, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector& c = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- // v0 -> a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
- // v1 -> b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
- // v2 -> b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
-
- // vert 0 from leading vertex
- verts[0] = swizzleLane0(a);
-
- // vert 1
- if (primIndex < 15)
- {
- verts[1] = swizzleLaneN(b, primIndex + 1);
- }
- else
- {
- verts[1] = swizzleLane0(c);
- }
-
- // vert 2
- if (primIndex < 14)
- {
- verts[2] = swizzleLaneN(b, primIndex + 2);
- }
- else
- {
- verts[2] = swizzleLaneN(c, primIndex - 14);
- }
-#else
- const simdvector& a = PaGetSimdVector(pa, pa.first, slot);
- const simdvector& b = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector& c = PaGetSimdVector(pa, pa.cur, slot);
-
- // vert 0 from leading vertex
- verts[0] = swizzleLane0(a);
-
- // vert 1
- if (primIndex < 7)
- {
- verts[1] = swizzleLaneN(b, primIndex + 1);
- }
- else
- {
- verts[1] = swizzleLane0(c);
- }
-
- // vert 2
- if (primIndex < 6)
- {
- verts[2] = swizzleLaneN(b, primIndex + 2);
- }
- else
- {
- verts[2] = swizzleLaneN(c, primIndex - 6);
- }
-#endif
-}
-
-bool PaQuadList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaQuadList1, PaQuadListSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
-}
-
-bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, 0, slot);
- simdvector& b = PaGetSimdVector(pa, 1, slot);
-
-#endif
- simdscalar s1, s2;
-
- for (int i = 0; i < 4; ++i)
- {
- simdscalar a0 = a[i];
- simdscalar b0 = b[i];
-
- s1 = _mm256_permute2f128_ps(a0, b0, 0x20);
- s2 = _mm256_permute2f128_ps(a0, b0, 0x31);
-
- simdvector& v0 = verts[0];
- v0[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(0, 0, 0, 0));
-
- simdvector& v1 = verts[1];
- v1[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(2, 1, 2, 1));
-
- simdvector& v2 = verts[2];
- v2[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(3, 2, 3, 2));
- }
-
- SetNextPaState(pa, PaQuadList0, PaQuadListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaQuadList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaQuadList1_simd16, PaQuadList1, PaQuadListSingle0);
- return false; // Not enough vertices to assemble 16 triangles.
-}
-
-bool PaQuadList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
-
- // v0 -> a0 a0 a4 a4 a8 a8 aC aC b0 b0 b0 b0 b0 b0 bC bC
- // v1 -> a1 a2 a5 a6 a9 aA aD aE b1 b2 b5 b6 b9 bA bD bE
- // v2 -> a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
-
- simd16vector& v0 = verts[0];
- simd16vector& v1 = verts[1];
- simd16vector& v2 = verts[2];
-
- // for simd16 x, y, z, and w
- for (uint32_t i = 0; i < 4; i += 1)
- {
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
-
- simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88); // (2 0 2 0) = 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b8 b9 bA bB
- simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD); // (3 1 3 1) = 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
-
- v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(0, 0, 0, 0)); // a0 a0 a4 a4 a8 a8 aC aC b0 b0 b4 b4 b8 b8 bC bC
- v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 1, 2, 1)); // a1 a2 a5 a6 a9 aA aD aE b1 b2 b6 b6 b9 bA bD bE
- v2[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 2, 3, 2)); // a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
- }
-
- SetNextPaState_simd16(pa, PaQuadList0_simd16, PaQuadList0, PaQuadListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-
- // clang-format on
-}
-
-#endif
-void PaQuadListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- switch (primIndex)
- {
- case 0:
- // triangle 0 - 0 1 2
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- // triangle 1 - 0 2 3
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane2(a);
- verts[2] = swizzleLane3(a);
- break;
- case 2:
- // triangle 2 - 4 5 6
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- verts[2] = swizzleLane6(a);
- break;
- case 3:
- // triangle 3 - 4 6 7
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane6(a);
- verts[2] = swizzleLane7(a);
- break;
- case 4:
- // triangle 4 - 8 9 A
- verts[0] = swizzleLane8(a);
- verts[1] = swizzleLane9(a);
- verts[2] = swizzleLaneA(a);
- break;
- case 5:
- // triangle 5 - 8 A B
- verts[0] = swizzleLane8(a);
- verts[1] = swizzleLaneA(a);
- verts[2] = swizzleLaneB(a);
- break;
- case 6:
- // triangle 6 - C D E
- verts[0] = swizzleLaneC(a);
- verts[1] = swizzleLaneD(a);
- verts[2] = swizzleLaneE(a);
- break;
- case 7:
- // triangle 7 - C E F
- verts[0] = swizzleLaneC(a);
- verts[1] = swizzleLaneE(a);
- verts[2] = swizzleLaneF(a);
- break;
- case 8:
- // triangle 0 - 0 1 2
- verts[0] = swizzleLane0(b);
- verts[1] = swizzleLane1(b);
- verts[2] = swizzleLane2(b);
- break;
- case 9:
- // triangle 1 - 0 2 3
- verts[0] = swizzleLane0(b);
- verts[1] = swizzleLane2(b);
- verts[2] = swizzleLane3(b);
- break;
- case 10:
- // triangle 2 - 4 5 6
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane5(b);
- verts[2] = swizzleLane6(b);
- break;
- case 11:
- // triangle 3 - 4 6 7
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane6(b);
- verts[2] = swizzleLane7(b);
- break;
- case 12:
- // triangle 4 - 8 9 A
- verts[0] = swizzleLane8(b);
- verts[1] = swizzleLane9(b);
- verts[2] = swizzleLaneA(b);
- break;
- case 13:
- // triangle 5 - 8 A B
- verts[0] = swizzleLane8(b);
- verts[1] = swizzleLaneA(b);
- verts[2] = swizzleLaneB(b);
- break;
- case 14:
- // triangle 6 - C D E
- verts[0] = swizzleLaneC(b);
- verts[1] = swizzleLaneD(b);
- verts[2] = swizzleLaneE(b);
- break;
- case 15:
- // triangle 7 - C E F
- verts[0] = swizzleLaneC(b);
- verts[1] = swizzleLaneE(b);
- verts[2] = swizzleLaneF(b);
- break;
- }
-#else
- const simdvector& a = PaGetSimdVector(pa, 0, slot);
- const simdvector& b = PaGetSimdVector(pa, 1, slot);
-
- switch (primIndex)
- {
- case 0:
- // triangle 0 - 0 1 2
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- // triangle 1 - 0 2 3
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane2(a);
- verts[2] = swizzleLane3(a);
- break;
- case 2:
- // triangle 2 - 4 5 6
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- verts[2] = swizzleLane6(a);
- break;
- case 3:
- // triangle 3 - 4 6 7
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane6(a);
- verts[2] = swizzleLane7(a);
- break;
- case 4:
- // triangle 4 - 8 9 10 (0 1 2)
- verts[0] = swizzleLane0(b);
- verts[1] = swizzleLane1(b);
- verts[2] = swizzleLane2(b);
- break;
- case 5:
- // triangle 1 - 0 2 3
- verts[0] = swizzleLane0(b);
- verts[1] = swizzleLane2(b);
- verts[2] = swizzleLane3(b);
- break;
- case 6:
- // triangle 2 - 4 5 6
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane5(b);
- verts[2] = swizzleLane6(b);
- break;
- case 7:
- // triangle 3 - 4 6 7
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane6(b);
- verts[2] = swizzleLane7(b);
- break;
- }
-#endif
-}
-
-bool PaLineLoop0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaLineLoop1, PaLineLoopSingle0);
- return false;
-}
-
-bool PaLineLoop1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- PaLineStrip1(pa, slot, verts);
-
- if (pa.numPrimsComplete + KNOB_SIMD_WIDTH > pa.numPrims - 1)
- {
- // loop reconnect now
- const int lane = pa.numPrims - pa.numPrimsComplete - 1;
-
-#if USE_SIMD16_FRONTEND
- simdvector first;
-
- const simd16vector& first_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
-
- if (!pa.useAlternateOffset)
- {
- for (uint32_t i = 0; i < 4; i += 1)
- {
- first[i] = _simd16_extract_ps(first_16[i], 0);
- }
- }
- else
- {
- for (uint32_t i = 0; i < 4; i += 1)
- {
- first[i] = _simd16_extract_ps(first_16[i], 1);
- }
- }
-
-#else
- simdvector& first = PaGetSimdVector(pa, pa.first, slot);
-
-#endif
- for (int i = 0; i < 4; i++)
- {
- float* firstVtx = (float*)&(first[i]);
- float* targetVtx = (float*)&(verts[1][i]);
- targetVtx[lane] = firstVtx[0];
- }
- }
-
- SetNextPaState(pa, PaLineLoop1, PaLineLoopSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaLineLoop0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaLineLoop1_simd16, PaLineLoop1, PaLineLoopSingle0);
- return false;
-}
-
-bool PaLineLoop1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- PaLineStrip1_simd16(pa, slot, verts);
-
- if (pa.numPrimsComplete + KNOB_SIMD16_WIDTH > pa.numPrims - 1)
- {
- // loop reconnect now
- const int lane = pa.numPrims - pa.numPrimsComplete - 1;
-
- const simd16vector& first = PaGetSimdVector_simd16(pa, pa.first, slot);
-
- for (int i = 0; i < 4; i++)
- {
- float* firstVtx = (float*)&(first[i]);
- float* targetVtx = (float*)&(verts[1][i]);
- targetVtx[lane] = firstVtx[0];
- }
- }
-
- SetNextPaState_simd16(
- pa, PaLineLoop1_simd16, PaLineLoop1, PaLineLoopSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-}
-
-#endif
-void PaLineLoopSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
- PaLineStripSingle0(pa, slot, primIndex, verts);
-
- if (pa.numPrimsComplete + primIndex == pa.numPrims - 1)
- {
-#if USE_SIMD16_FRONTEND
- const simd16vector& first = PaGetSimdVector_simd16(pa, pa.first, slot);
-
- verts[1] = swizzleLane0(first);
-#else
- const simdvector& first = PaGetSimdVector(pa, pa.first, slot);
-
- verts[1] = swizzleLane0(first);
-#endif
- }
-}
-
-bool PaLineList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaLineList1, PaLineListSingle0);
- return false; // Not enough vertices to assemble 8 lines
-}
-
-bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, 0, slot);
- simdvector& b = PaGetSimdVector(pa, 1, slot);
-
-#endif
- /// @todo: verify provoking vertex is correct
- // Line list 0 1 2 3 4 5 6 7
- // 8 9 10 11 12 13 14 15
-
- // shuffle:
- // 0 2 4 6 8 10 12 14
- // 1 3 5 7 9 11 13 15
-
- for (uint32_t i = 0; i < 4; ++i)
- {
- // 0 1 2 3 8 9 10 11
- __m256 vALowBLow = _mm256_permute2f128_ps(a.v[i], b.v[i], 0x20);
- // 4 5 6 7 12 13 14 15
- __m256 vAHighBHigh = _mm256_permute2f128_ps(a.v[i], b.v[i], 0x31);
-
- // 0 2 4 6 8 10 12 14
- verts[0].v[i] = _mm256_shuffle_ps(vALowBLow, vAHighBHigh, _MM_SHUFFLE(2, 0, 2, 0));
- // 1 3 5 7 9 11 13 15
- verts[1].v[i] = _mm256_shuffle_ps(vALowBLow, vAHighBHigh, _MM_SHUFFLE(3, 1, 3, 1));
- }
-
- SetNextPaState(pa, PaLineList0, PaLineListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaLineList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaLineList1_simd16, PaLineList1, PaLineListSingle0);
- return false; // Not enough vertices to assemble 16 lines
-}
-
-bool PaLineList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
-
- // v0 -> a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
- // v1 -> a1 a3 a5 a7 a9 aB aD aF b1 b3 b4 b7 b9 bB bD bF
-
- simd16vector& v0 = verts[0];
- simd16vector& v1 = verts[1];
-
- // for simd16 x, y, z, and w
- for (int i = 0; i < 4; i += 1)
- {
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
-
- simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88); // (2 0 2 0) 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b9 b9 bA bB
- simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD); // (3 1 3 1) 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
-
- v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
- v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 1, 3, 1)); // a1 a3 a5 a7 a9 aB aD aF b1 b3 b5 b7 b9 bB bD bF
- }
-
- SetNextPaState_simd16(pa, PaLineList0_simd16, PaLineList0, PaLineListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-
- // clang-format on
-}
-
-#endif
-void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- break;
- case 1:
- verts[0] = swizzleLane2(a);
- verts[1] = swizzleLane3(a);
- break;
- case 2:
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- break;
- case 3:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- break;
- case 4:
- verts[0] = swizzleLane8(a);
- verts[1] = swizzleLane9(a);
- break;
- case 5:
- verts[0] = swizzleLaneA(a);
- verts[1] = swizzleLaneB(a);
- break;
- case 6:
- verts[0] = swizzleLaneC(a);
- verts[1] = swizzleLaneD(a);
- break;
- case 7:
- verts[0] = swizzleLaneE(a);
- verts[1] = swizzleLaneF(a);
- break;
- case 8:
- verts[0] = swizzleLane0(b);
- verts[1] = swizzleLane1(b);
- break;
- case 9:
- verts[0] = swizzleLane2(b);
- verts[1] = swizzleLane3(b);
- break;
- case 10:
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane5(b);
- break;
- case 11:
- verts[0] = swizzleLane6(b);
- verts[1] = swizzleLane7(b);
- break;
- case 12:
- verts[0] = swizzleLane8(b);
- verts[1] = swizzleLane9(b);
- break;
- case 13:
- verts[0] = swizzleLaneA(b);
- verts[1] = swizzleLaneB(b);
- break;
- case 14:
- verts[0] = swizzleLaneC(b);
- verts[1] = swizzleLaneD(b);
- break;
- case 15:
- verts[0] = swizzleLaneE(b);
- verts[1] = swizzleLaneF(b);
- break;
- }
-#else
- const simdvector& a = PaGetSimdVector(pa, 0, slot);
- const simdvector& b = PaGetSimdVector(pa, 1, slot);
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- break;
- case 1:
- verts[0] = swizzleLane2(a);
- verts[1] = swizzleLane3(a);
- break;
- case 2:
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- break;
- case 3:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- break;
- case 4:
- verts[0] = swizzleLane0(b);
- verts[1] = swizzleLane1(b);
- break;
- case 5:
- verts[0] = swizzleLane2(b);
- verts[1] = swizzleLane3(b);
- break;
- case 6:
- verts[0] = swizzleLane4(b);
- verts[1] = swizzleLane5(b);
- break;
- case 7:
- verts[0] = swizzleLane6(b);
- verts[1] = swizzleLane7(b);
- break;
- }
-#endif
-}
-
-bool PaLineStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaLineStrip1, PaLineStripSingle0);
- return false; // Not enough vertices to assemble 8 lines
-}
-
-bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
- simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
-
-#endif
- /// @todo: verify provoking vertex is correct
- // Line list 0 1 2 3 4 5 6 7
- // 8 9 10 11 12 13 14 15
-
- // shuffle:
- // 0 1 2 3 4 5 6 7
- // 1 2 3 4 5 6 7 8
-
- verts[0] = a;
-
- for (uint32_t i = 0; i < 4; ++i)
- {
- // 1 2 3 x 5 6 7 x
- __m256 vPermA = _mm256_permute_ps(a.v[i], 0x39); // indices hi->low 00 11 10 01 (0 3 2 1)
- // 4 5 6 7 8 9 10 11
- __m256 vAHighBLow = _mm256_permute2f128_ps(a.v[i], b.v[i], 0x21);
-
- // x x x 4 x x x 8
- __m256 vPermB = _mm256_permute_ps(vAHighBLow, 0); // indices hi->low (0 0 0 0)
-
- verts[1].v[i] = _mm256_blend_ps(vPermA, vPermB, 0x88);
- }
-
- SetNextPaState(pa, PaLineStrip1, PaLineStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaLineStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaLineStrip1_simd16, PaLineStrip1, PaLineStripSingle0);
- return false; // Not enough vertices to assemble 16 lines
-}
-
-bool PaLineStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
- const simd16scalari perm = _simd16_set_epi32(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-
- const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- const simd16mask mask0 = 0x0001;
-
- // v0 -> a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
- // v1 -> a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
-
- simd16vector& v0 = verts[0];
- simd16vector& v1 = verts[1];
-
- v0 = a; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
-
- // for simd16 x, y, z, and w
- for (int i = 0; i < 4; i += 1)
- {
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
-
- simd16scalar temp = _simd16_blend_ps(tempa, tempb, mask0); // b0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
-
- v1[i] = _simd16_permute_ps(temp, perm); // a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
- }
-
- SetNextPaState_simd16(pa, PaLineStrip1_simd16, PaLineStrip1, PaLineStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
- return true;
-
- // clang-format on
-}
-
-#endif
-void PaLineStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- break;
- case 1:
- verts[0] = swizzleLane1(a);
- verts[1] = swizzleLane2(a);
- break;
- case 2:
- verts[0] = swizzleLane2(a);
- verts[1] = swizzleLane3(a);
- break;
- case 3:
- verts[0] = swizzleLane3(a);
- verts[1] = swizzleLane4(a);
- break;
- case 4:
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- break;
- case 5:
- verts[0] = swizzleLane5(a);
- verts[1] = swizzleLane6(a);
- break;
- case 6:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- break;
- case 7:
- verts[0] = swizzleLane7(a);
- verts[1] = swizzleLane8(a);
- break;
- case 8:
- verts[0] = swizzleLane8(a);
- verts[1] = swizzleLane9(a);
- break;
- case 9:
- verts[0] = swizzleLane9(a);
- verts[1] = swizzleLaneA(a);
- break;
- case 10:
- verts[0] = swizzleLaneA(a);
- verts[1] = swizzleLaneB(a);
- break;
- case 11:
- verts[0] = swizzleLaneB(a);
- verts[1] = swizzleLaneC(a);
- break;
- case 12:
- verts[0] = swizzleLaneC(a);
- verts[1] = swizzleLaneD(a);
- break;
- case 13:
- verts[0] = swizzleLaneD(a);
- verts[1] = swizzleLaneE(a);
- break;
- case 14:
- verts[0] = swizzleLaneE(a);
- verts[1] = swizzleLaneF(a);
- break;
- case 15:
- verts[0] = swizzleLaneF(a);
- verts[1] = swizzleLane0(b);
- break;
- }
-#else
- const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
-
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- break;
- case 1:
- verts[0] = swizzleLane1(a);
- verts[1] = swizzleLane2(a);
- break;
- case 2:
- verts[0] = swizzleLane2(a);
- verts[1] = swizzleLane3(a);
- break;
- case 3:
- verts[0] = swizzleLane3(a);
- verts[1] = swizzleLane4(a);
- break;
- case 4:
- verts[0] = swizzleLane4(a);
- verts[1] = swizzleLane5(a);
- break;
- case 5:
- verts[0] = swizzleLane5(a);
- verts[1] = swizzleLane6(a);
- break;
- case 6:
- verts[0] = swizzleLane6(a);
- verts[1] = swizzleLane7(a);
- break;
- case 7:
- verts[0] = swizzleLane7(a);
- verts[1] = swizzleLane0(b);
- break;
- }
-#endif
-}
-
-bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-#if USE_SIMD16_FRONTEND
- simdvector a;
-
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-
- if (!pa.useAlternateOffset)
- {
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- }
- }
- else
- {
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, 0, slot);
-
-#endif
- verts[0] = a; // points only have 1 vertex.
-
- SetNextPaState(pa, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-bool PaPoints0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- simd16vector& a = PaGetSimdVector_simd16(pa, pa.cur, slot);
-
- verts[0] = a; // points only have 1 vertex.
-
- SetNextPaState_simd16(
- pa, PaPoints0_simd16, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#endif
-void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-#if USE_SIMD16_FRONTEND
- const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
-
- if (pa.useAlternateOffset)
- {
- primIndex += KNOB_SIMD_WIDTH;
- }
-
- verts[0] = swizzleLaneN(a, primIndex);
-#else
- const simdvector& a = PaGetSimdVector(pa, 0, slot);
-
- verts[0] = swizzleLaneN(a, primIndex);
-#endif
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief State 1 for RECT_LIST topology.
-/// There is not enough to assemble 8 triangles.
-bool PaRectList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SetNextPaState(pa, PaRectList1, PaRectListSingle0);
- return false;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief State 1 for RECT_LIST topology.
-/// Rect lists has the following format.
-/// w x y z
-/// v2 o---o v5 o---o v8 o---o v11 o---o
-/// | \ | | \ | | \ | | \ |
-/// v1 o---o v4 o---o v7 o---o v10 o---o
-/// v0 v3 v6 v9
-///
-/// Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
-///
-/// tri0 = { v0, v1, v2 } tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
-/// tri2 = { v3, v4, v5 } tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
-/// etc.
-///
-/// PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
-/// where v0 contains all the first vertices for 8 triangles.
-///
-/// Result:
-/// verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
-/// verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
-/// verts[2] = { v2, w, v5, x, v8, y, v11, z }
-///
-/// @param pa - State for PA state machine.
-/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
-/// etc.
-bool PaRectList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
-// SIMD vectors a and b are the last two vertical outputs from the vertex shader.
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- ;
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, 0, slot); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7 }
- simdvector& b = PaGetSimdVector(pa, 1, slot); // b[] = { v8, v9, v10, v11, v12, v13, v14, v15 }
-
-#endif
- __m256 tmp0, tmp1, tmp2;
-
- // Loop over each component in the simdvector.
- for (int i = 0; i < 4; ++i)
- {
- simdvector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
- tmp0 = _mm256_permute2f128_ps(
- b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
- v0[i] = _mm256_blend_ps(
- a[i],
- tmp0,
- 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
- tmp1 = _mm256_permute_ps(v0[i], 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
- v0[i] = _mm256_permute_ps(v0[i], 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
- v0[i] =
- _mm256_blend_ps(tmp1, v0[i], 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
-
- /// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
- /// AVX2 should make this much cheaper.
- simdvector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
- v1[i] = _mm256_permute_ps(a[i], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
- tmp1 = _mm256_permute_ps(a[i], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
- tmp2 = _mm256_blend_ps(v1[i], tmp1, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
- tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7, *, v4, v5, *, *, *, * }
- v1[i] = _mm256_permute_ps(tmp0, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
- v1[i] = _mm256_blend_ps(tmp2, v1[i], 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
- v1[i] = _mm256_blend_ps(v1[i], tmp1, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
-
- // verts[2] = { v2, w, v5, x, v8, y, v11, z }
- simdvector& v2 = verts[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
- v2[i] = _mm256_permute_ps(tmp0, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
- tmp1 = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
- v2[i] = _mm256_blend_ps(tmp1, v2[i], 0xF0);
-
- // Need to compute 4th implied vertex for the rectangle.
- tmp2 = _mm256_sub_ps(v0[i], v1[i]);
- tmp2 = _mm256_add_ps(tmp2, v2[i]); // tmp2 = { w, *, x, *, y, *, z, * }
- tmp2 = _mm256_permute_ps(tmp2, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
- v2[i] = _mm256_blend_ps(v2[i], tmp2, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
- }
-
- SetNextPaState(pa, PaRectList1, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief State 2 for RECT_LIST topology.
-/// Not implemented unless there is a use case for more then 8 rects.
-/// @param pa - State for PA state machine.
-/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
-/// etc.
-bool PaRectList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
-{
- SWR_INVALID("Is rect list used for anything other then clears?");
- SetNextPaState(pa, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#if ENABLE_AVX512_SIMD16
-//////////////////////////////////////////////////////////////////////////
-/// @brief State 1 for RECT_LIST topology.
-/// There is not enough to assemble 8 triangles.
-bool PaRectList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SetNextPaState_simd16(pa, PaRectList1_simd16, PaRectList1, PaRectListSingle0);
- return false;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief State 1 for RECT_LIST topology.
-/// Rect lists has the following format.
-/// w x y z
-/// v2 o---o v5 o---o v8 o---o v11 o---o
-/// | \ | | \ | | \ | | \ |
-/// v1 o---o v4 o---o v7 o---o v10 o---o
-/// v0 v3 v6 v9
-///
-/// Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
-///
-/// tri0 = { v0, v1, v2 } tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
-/// tri2 = { v3, v4, v5 } tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
-/// etc.
-///
-/// PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
-/// where v0 contains all the first vertices for 8 triangles.
-///
-/// Result:
-/// verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
-/// verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
-/// verts[2] = { v2, w, v5, x, v8, y, v11, z }
-///
-/// @param pa - State for PA state machine.
-/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
-/// etc.
-bool PaRectList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- // clang-format off
-
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7,
- // v8, v9, v10, v11, v12, v13, v14, v15 }
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot); // b[] = { v16...but not used by this implementation.. }
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- }
- }
-
- simd16vector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
- simd16vector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
- simd16vector& v2 = verts[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
-
- // Loop over each component in the simdvector.
- for (int i = 0; i < 4; i += 1)
- {
- simdscalar v0_lo; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
- simdscalar v1_lo; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
- simdscalar v2_lo; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
-
- __m256 tmp0, tmp1, tmp2;
-
- tmp0 = _mm256_permute2f128_ps(b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
- v0_lo = _mm256_blend_ps(a[i], tmp0, 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
- tmp1 = _mm256_permute_ps(v0_lo, 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
- v0_lo = _mm256_permute_ps(v0_lo, 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
- v0_lo = _mm256_blend_ps(tmp1, v0_lo, 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
-
- /// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
- /// AVX2 should make this much cheaper.
- v1_lo = _mm256_permute_ps(a[i], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
- tmp1 = _mm256_permute_ps(a[i], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
- tmp2 = _mm256_blend_ps(v1_lo, tmp1, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
- tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7, *, v4, v5, *, *, *, * }
- v1_lo = _mm256_permute_ps(tmp0, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
- v1_lo = _mm256_blend_ps(tmp2, v1_lo, 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
- v1_lo = _mm256_blend_ps(v1_lo, tmp1, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
-
- // verts[2] = { v2, w, v5, x, v8, y, v11, z }
- v2_lo = _mm256_permute_ps(tmp0, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
- tmp1 = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
- v2_lo = _mm256_blend_ps(tmp1, v2_lo, 0xF0);
-
- // Need to compute 4th implied vertex for the rectangle.
- tmp2 = _mm256_sub_ps(v0_lo, v1_lo);
- tmp2 = _mm256_add_ps(tmp2, v2_lo); // tmp2 = { w, *, x, *, y, *, z, * }
- tmp2 = _mm256_permute_ps(tmp2, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
- v2_lo = _mm256_blend_ps(v2_lo, tmp2, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
-
- v0[i] = _simd16_insert_ps(_simd16_setzero_ps(), v0_lo, 0);
- v1[i] = _simd16_insert_ps(_simd16_setzero_ps(), v1_lo, 0);
- v2[i] = _simd16_insert_ps(_simd16_setzero_ps(), v2_lo, 0);
- }
-
- SetNextPaState_simd16(pa, PaRectList1_simd16, PaRectList1, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-
- // clang-format on
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief State 2 for RECT_LIST topology.
-/// Not implemented unless there is a use case for more then 8 rects.
-/// @param pa - State for PA state machine.
-/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
-/// etc.
-bool PaRectList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
-{
- SWR_INVALID("Is rect list used for anything other then clears?");
- SetNextPaState_simd16(
- pa, PaRectList0_simd16, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
- return true;
-}
-
-#endif
-//////////////////////////////////////////////////////////////////////////
-/// @brief This procedure is called by the Binner to assemble the attributes.
-/// Unlike position, which is stored vertically, the attributes are
-/// stored horizontally. The outputs from the VS, labeled as 'a' and
-/// 'b' are vertical. This function needs to transpose the lanes
-/// containing the vertical attribute data into horizontal form.
-/// @param pa - State for PA state machine.
-/// @param slot - Index into VS output for a given attribute.
-/// @param primIndex - Binner processes each triangle individually.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
-/// etc.
-void PaRectListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
-{
-// We have 12 simdscalars contained within 3 simdvectors which
-// hold at least 8 triangles worth of data. We want to assemble a single
-// triangle with data in horizontal form.
-#if USE_SIMD16_FRONTEND
- simdvector a;
- simdvector b;
-
- if (!pa.useAlternateOffset)
- {
- const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(a_16[i], 0);
- b[i] = _simd16_extract_ps(a_16[i], 1);
- }
- }
- else
- {
- const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-
- for (uint32_t i = 0; i < 4; i += 1)
- {
- a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);
- ;
- }
- }
-
-#else
- simdvector& a = PaGetSimdVector(pa, 0, slot);
-
-#endif
- // Convert from vertical to horizontal.
- switch (primIndex)
- {
- case 0:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane1(a);
- verts[2] = swizzleLane2(a);
- break;
- case 1:
- verts[0] = swizzleLane0(a);
- verts[1] = swizzleLane2(a);
- verts[2] = _mm_blend_ps(verts[0], verts[1], 0xA);
- break;
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- SWR_INVALID("Invalid primIndex: %d", primIndex);
- break;
- };
-}
-
-PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT* in_pDC,
- uint32_t in_numPrims,
- uint8_t* pStream,
- uint32_t in_streamSizeInVerts,
- uint32_t in_vertexStride,
- bool in_isStreaming,
- uint32_t numVertsPerPrim,
- PRIMITIVE_TOPOLOGY topo) :
- PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride, numVertsPerPrim),
- numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0), cur(0), prev(0), first(0),
- counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming)
-{
- const API_STATE& state = GetApiState(pDC);
-
- this->binTopology = topo == TOP_UNKNOWN ? state.topology : topo;
-
-#if ENABLE_AVX512_SIMD16
- pfnPaFunc_simd16 = nullptr;
-
-#endif
- switch (this->binTopology)
- {
- case TOP_TRIANGLE_LIST:
- this->pfnPaFunc = PaTriList0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriList0_simd16;
-#endif
- break;
- case TOP_TRIANGLE_STRIP:
- this->pfnPaFunc = PaTriStrip0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
-#endif
- break;
- case TOP_TRIANGLE_FAN:
- this->pfnPaFunc = PaTriFan0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriFan0_simd16;
-#endif
- break;
- case TOP_QUAD_LIST:
- this->pfnPaFunc = PaQuadList0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaQuadList0_simd16;
-#endif
- this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
- break;
- case TOP_QUAD_STRIP:
- // quad strip pattern when decomposed into triangles is the same as verts strips
- this->pfnPaFunc = PaTriStrip0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
-#endif
- this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
- break;
- case TOP_LINE_LIST:
- this->pfnPaFunc = PaLineList0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaLineList0_simd16;
-#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_LINE_STRIP:
- this->pfnPaFunc = PaLineStrip0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaLineStrip0_simd16;
-#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_LINE_LOOP:
- this->pfnPaFunc = PaLineLoop0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaLineLoop0_simd16;
-#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_POINT_LIST:
- this->pfnPaFunc = PaPoints0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPoints0_simd16;
-#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_RECT_LIST:
- this->pfnPaFunc = PaRectList0;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaRectList0_simd16;
-#endif
- this->numPrims = in_numPrims * 2;
- break;
-
- case TOP_PATCHLIST_1:
- this->pfnPaFunc = PaPatchList<1>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<1>;
-#endif
- break;
- case TOP_PATCHLIST_2:
- this->pfnPaFunc = PaPatchList<2>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<2>;
-#endif
- break;
- case TOP_PATCHLIST_3:
- this->pfnPaFunc = PaPatchList<3>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<3>;
-#endif
- break;
- case TOP_PATCHLIST_4:
- this->pfnPaFunc = PaPatchList<4>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<4>;
-#endif
- break;
- case TOP_PATCHLIST_5:
- this->pfnPaFunc = PaPatchList<5>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<5>;
-#endif
- break;
- case TOP_PATCHLIST_6:
- this->pfnPaFunc = PaPatchList<6>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<6>;
-#endif
- break;
- case TOP_PATCHLIST_7:
- this->pfnPaFunc = PaPatchList<7>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<7>;
-#endif
- break;
- case TOP_PATCHLIST_8:
- this->pfnPaFunc = PaPatchList<8>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<8>;
-#endif
- break;
- case TOP_PATCHLIST_9:
- this->pfnPaFunc = PaPatchList<9>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<9>;
-#endif
- break;
- case TOP_PATCHLIST_10:
- this->pfnPaFunc = PaPatchList<10>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<10>;
-#endif
- break;
- case TOP_PATCHLIST_11:
- this->pfnPaFunc = PaPatchList<11>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<11>;
-#endif
- break;
- case TOP_PATCHLIST_12:
- this->pfnPaFunc = PaPatchList<12>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<12>;
-#endif
- break;
- case TOP_PATCHLIST_13:
- this->pfnPaFunc = PaPatchList<13>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<13>;
-#endif
- break;
- case TOP_PATCHLIST_14:
- this->pfnPaFunc = PaPatchList<14>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<14>;
-#endif
- break;
- case TOP_PATCHLIST_15:
- this->pfnPaFunc = PaPatchList<15>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<15>;
-#endif
- break;
- case TOP_PATCHLIST_16:
- this->pfnPaFunc = PaPatchList<16>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<16>;
-#endif
- break;
- case TOP_PATCHLIST_17:
- this->pfnPaFunc = PaPatchList<17>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<17>;
-#endif
- break;
- case TOP_PATCHLIST_18:
- this->pfnPaFunc = PaPatchList<18>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<18>;
-#endif
- break;
- case TOP_PATCHLIST_19:
- this->pfnPaFunc = PaPatchList<19>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<19>;
-#endif
- break;
- case TOP_PATCHLIST_20:
- this->pfnPaFunc = PaPatchList<20>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<20>;
-#endif
- break;
- case TOP_PATCHLIST_21:
- this->pfnPaFunc = PaPatchList<21>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<21>;
-#endif
- break;
- case TOP_PATCHLIST_22:
- this->pfnPaFunc = PaPatchList<22>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<22>;
-#endif
- break;
- case TOP_PATCHLIST_23:
- this->pfnPaFunc = PaPatchList<23>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<23>;
-#endif
- break;
- case TOP_PATCHLIST_24:
- this->pfnPaFunc = PaPatchList<24>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<24>;
-#endif
- break;
- case TOP_PATCHLIST_25:
- this->pfnPaFunc = PaPatchList<25>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<25>;
-#endif
- break;
- case TOP_PATCHLIST_26:
- this->pfnPaFunc = PaPatchList<26>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<26>;
-#endif
- break;
- case TOP_PATCHLIST_27:
- this->pfnPaFunc = PaPatchList<27>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<27>;
-#endif
- break;
- case TOP_PATCHLIST_28:
- this->pfnPaFunc = PaPatchList<28>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<28>;
-#endif
- break;
- case TOP_PATCHLIST_29:
- this->pfnPaFunc = PaPatchList<29>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<29>;
-#endif
- break;
- case TOP_PATCHLIST_30:
- this->pfnPaFunc = PaPatchList<30>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<30>;
-#endif
- break;
- case TOP_PATCHLIST_31:
- this->pfnPaFunc = PaPatchList<31>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<31>;
-#endif
- break;
- case TOP_PATCHLIST_32:
- this->pfnPaFunc = PaPatchList<32>;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<32>;
-#endif
- break;
-
- default:
- SWR_INVALID("Invalid topology: %d", this->binTopology);
- break;
- };
-
- this->pfnPaFuncReset = this->pfnPaFunc;
-#if ENABLE_AVX512_SIMD16
- this->pfnPaFuncReset_simd16 = this->pfnPaFunc_simd16;
-#endif
-
-#if USE_SIMD16_FRONTEND
- simd16scalari id16 = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
- simd16scalari id82 = _simd16_set_epi32(7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0);
-
-#else
- simdscalari id8 = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
- simdscalari id4 = _simd_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
-
-#endif
- switch (this->binTopology)
- {
- case TOP_TRIANGLE_LIST:
- case TOP_TRIANGLE_STRIP:
- case TOP_TRIANGLE_FAN:
- case TOP_LINE_STRIP:
- case TOP_LINE_LIST:
- case TOP_LINE_LOOP:
-#if USE_SIMD16_FRONTEND
- this->primIDIncr = 16;
- this->primID = id16;
-#else
- this->primIDIncr = 8;
- this->primID = id8;
-#endif
- break;
- case TOP_QUAD_LIST:
- case TOP_QUAD_STRIP:
- case TOP_RECT_LIST:
-#if USE_SIMD16_FRONTEND
- this->primIDIncr = 8;
- this->primID = id82;
-#else
- this->primIDIncr = 4;
- this->primID = id4;
-#endif
- break;
- case TOP_POINT_LIST:
-#if USE_SIMD16_FRONTEND
- this->primIDIncr = 16;
- this->primID = id16;
-#else
- this->primIDIncr = 8;
- this->primID = id8;
-#endif
- break;
- case TOP_PATCHLIST_1:
- case TOP_PATCHLIST_2:
- case TOP_PATCHLIST_3:
- case TOP_PATCHLIST_4:
- case TOP_PATCHLIST_5:
- case TOP_PATCHLIST_6:
- case TOP_PATCHLIST_7:
- case TOP_PATCHLIST_8:
- case TOP_PATCHLIST_9:
- case TOP_PATCHLIST_10:
- case TOP_PATCHLIST_11:
- case TOP_PATCHLIST_12:
- case TOP_PATCHLIST_13:
- case TOP_PATCHLIST_14:
- case TOP_PATCHLIST_15:
- case TOP_PATCHLIST_16:
- case TOP_PATCHLIST_17:
- case TOP_PATCHLIST_18:
- case TOP_PATCHLIST_19:
- case TOP_PATCHLIST_20:
- case TOP_PATCHLIST_21:
- case TOP_PATCHLIST_22:
- case TOP_PATCHLIST_23:
- case TOP_PATCHLIST_24:
- case TOP_PATCHLIST_25:
- case TOP_PATCHLIST_26:
- case TOP_PATCHLIST_27:
- case TOP_PATCHLIST_28:
- case TOP_PATCHLIST_29:
- case TOP_PATCHLIST_30:
- case TOP_PATCHLIST_31:
- case TOP_PATCHLIST_32:
- // Always run KNOB_SIMD_WIDTH number of patches at a time.
-#if USE_SIMD16_FRONTEND
- this->primIDIncr = 16;
- this->primID = id16;
-#else
- this->primIDIncr = 8;
- this->primID = id8;
-#endif
- break;
-
- default:
- SWR_INVALID("Invalid topology: %d", this->binTopology);
- break;
- };
-}
-#endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
deleted file mode 100644
index c14cd56e52e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ /dev/null
@@ -1,473 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file rasterizer.cpp
- *
- * @brief Implementation for the rasterizer.
- *
- ******************************************************************************/
-
-#include <vector>
-#include <algorithm>
-
-#include "rasterizer.h"
-#include "backends/gen_rasterizer.hpp"
-#include "rdtsc_core.h"
-#include "backend.h"
-#include "utils.h"
-#include "frontend.h"
-#include "tilemgr.h"
-#include "memory/tilingtraits.h"
-#include "rasterizer_impl.h"
-
-PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
- [STATE_VALID_TRI_EDGE_COUNT][2];
-
-void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
-{
- const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pData);
-#if KNOB_ENABLE_TOSS_POINTS
- if (KNOB_TOSS_BIN_TRIS)
- {
- return;
- }
-#endif
-
- // bloat line to two tris and call the triangle rasterizer twice
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, pDC->drawId);
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
-
- // macrotile dimensioning
- uint32_t macroX, macroY;
- MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
- int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
- int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
- int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
- int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
-
- const SWR_RECT& scissorInFixedPoint =
- state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
-
- // create a copy of the triangle buffer to write our adjusted vertices to
- OSALIGNSIMD(float) newTriBuffer[4 * 4];
- TRIANGLE_WORK_DESC newWorkDesc = workDesc;
- newWorkDesc.pTriBuffer = &newTriBuffer[0];
-
- // create a copy of the attrib buffer to write our adjusted attribs to
- OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
- newWorkDesc.pAttribs = &newAttribBuffer[0];
-
- const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f);
- const __m128 vBloat1 = _mm_set_ps(0.5f, 0.5f, 0.5f, -0.5f);
-
- __m128 vX, vY, vZ, vRecipW;
-
- vX = _mm_load_ps(workDesc.pTriBuffer);
- vY = _mm_load_ps(workDesc.pTriBuffer + 4);
- vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
- vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
-
- // triangle 0
- // v0,v1 -> v0,v0,v1
- __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
- __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
- __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
- __m128 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 1, 0, 0));
-
- __m128 vLineWidth = _mm_set1_ps(pDC->pState->state.rastState.lineWidth);
- __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0);
- if (workDesc.triFlags.yMajor)
- {
- vXa = _mm_add_ps(vAdjust, vXa);
- }
- else
- {
- vYa = _mm_add_ps(vAdjust, vYa);
- }
-
- // Store triangle description for rasterizer
- _mm_store_ps((float*)&newTriBuffer[0], vXa);
- _mm_store_ps((float*)&newTriBuffer[4], vYa);
- _mm_store_ps((float*)&newTriBuffer[8], vZa);
- _mm_store_ps((float*)&newTriBuffer[12], vRecipWa);
-
- // binner bins 3 edges for lines as v0, v1, v1
- // tri0 needs v0, v0, v1
- for (uint32_t a = 0; a < workDesc.numAttribs; ++a)
- {
- __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]);
- __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]);
-
- _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib0);
- _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib0);
- _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib1);
- }
-
- // Store user clip distances for triangle 0
- float newClipBuffer[3 * 8];
- uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
- if (numClipDist)
- {
- newWorkDesc.pUserClipBuffer = newClipBuffer;
-
- float* pOldBuffer = workDesc.pUserClipBuffer;
- float* pNewBuffer = newClipBuffer;
- for (uint32_t i = 0; i < numClipDist; ++i)
- {
- // read barycentric coeffs from binner
- float a = *(pOldBuffer++);
- float b = *(pOldBuffer++);
-
- // reconstruct original clip distance at vertices
- float c0 = a + b;
- float c1 = b;
-
- // construct triangle barycentrics
- *(pNewBuffer++) = c0 - c1;
- *(pNewBuffer++) = c0 - c1;
- *(pNewBuffer++) = c1;
- }
- }
-
- // setup triangle rasterizer function
- PFN_WORK_FUNC pfnTriRast;
- // conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
- rastState.bIsCenterPattern,
- false,
- SWR_INPUT_COVERAGE_NONE,
- EdgeValToEdgeState(ALL_EDGES_VALID),
- (pDC->pState->state.scissorsTileAligned == false));
-
- // make sure this macrotile intersects the triangle
- __m128i vXai = fpToFixedPoint(vXa);
- __m128i vYai = fpToFixedPoint(vYa);
- OSALIGNSIMD(SWR_RECT) bboxA;
- calcBoundingBoxInt(vXai, vYai, bboxA);
-
- if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
- bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
- bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
- bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
- {
- // rasterize triangle
- pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
- }
-
- // triangle 1
- // v0,v1 -> v1,v1,v0
- vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
- vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
- vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
- vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 0, 1, 1));
-
- vAdjust = _mm_mul_ps(vLineWidth, vBloat1);
- if (workDesc.triFlags.yMajor)
- {
- vXa = _mm_add_ps(vAdjust, vXa);
- }
- else
- {
- vYa = _mm_add_ps(vAdjust, vYa);
- }
-
- // Store triangle description for rasterizer
- _mm_store_ps((float*)&newTriBuffer[0], vXa);
- _mm_store_ps((float*)&newTriBuffer[4], vYa);
- _mm_store_ps((float*)&newTriBuffer[8], vZa);
- _mm_store_ps((float*)&newTriBuffer[12], vRecipWa);
-
- // binner bins 3 edges for lines as v0, v1, v1
- // tri1 needs v1, v1, v0
- for (uint32_t a = 0; a < workDesc.numAttribs; ++a)
- {
- __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]);
- __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]);
-
- _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib1);
- _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib1);
- _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib0);
- }
-
- // store user clip distance for triangle 1
- if (numClipDist)
- {
- float* pOldBuffer = workDesc.pUserClipBuffer;
- float* pNewBuffer = newClipBuffer;
- for (uint32_t i = 0; i < numClipDist; ++i)
- {
- // read barycentric coeffs from binner
- float a = *(pOldBuffer++);
- float b = *(pOldBuffer++);
-
- // reconstruct original clip distance at vertices
- float c0 = a + b;
- float c1 = b;
-
- // construct triangle barycentrics
- *(pNewBuffer++) = c1 - c0;
- *(pNewBuffer++) = c1 - c0;
- *(pNewBuffer++) = c0;
- }
- }
-
- vXai = fpToFixedPoint(vXa);
- vYai = fpToFixedPoint(vYa);
- calcBoundingBoxInt(vXai, vYai, bboxA);
-
- if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
- bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
- bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
- bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
- {
- // rasterize triangle
- pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, 1);
-}
-
-void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
-{
-#if KNOB_ENABLE_TOSS_POINTS
- if (KNOB_TOSS_BIN_TRIS)
- {
- return;
- }
-#endif
-
- const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
-
- // map x,y relative offsets from start of raster tile to bit position in
- // coverage mask for the point
- static const uint32_t coverageMap[8][8] = {{0, 1, 4, 5, 8, 9, 12, 13},
- {2, 3, 6, 7, 10, 11, 14, 15},
- {16, 17, 20, 21, 24, 25, 28, 29},
- {18, 19, 22, 23, 26, 27, 30, 31},
- {32, 33, 36, 37, 40, 41, 44, 45},
- {34, 35, 38, 39, 42, 43, 46, 47},
- {48, 49, 52, 53, 56, 57, 60, 61},
- {50, 51, 54, 55, 58, 59, 62, 63}};
-
- OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc = {};
-
- // pull point information from triangle buffer
- // @todo use structs for readability
- uint32_t tileAlignedX = *(uint32_t*)workDesc.pTriBuffer;
- uint32_t tileAlignedY = *(uint32_t*)(workDesc.pTriBuffer + 1);
- float z = *(workDesc.pTriBuffer + 2);
-
- // construct triangle descriptor for point
- // no interpolation, set up i,j for constant interpolation of z and attribs
- // @todo implement an optimized backend that doesn't require triangle information
-
- // compute coverage mask from x,y packed into the coverageMask flag
- // mask indices by the maximum valid index for x/y of coveragemap.
- uint32_t tX = workDesc.triFlags.coverageMask & 0x7;
- uint32_t tY = (workDesc.triFlags.coverageMask >> 4) & 0x7;
- for (uint32_t i = 0; i < _countof(triDesc.coverageMask); ++i)
- {
- triDesc.coverageMask[i] = 1ULL << coverageMap[tY][tX];
- }
- triDesc.anyCoveredSamples = triDesc.coverageMask[0];
- triDesc.innerCoverageMask = triDesc.coverageMask[0];
-
- // no persp divide needed for points
- triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs;
- triDesc.triFlags = workDesc.triFlags;
- triDesc.recipDet = 1.0f;
- triDesc.OneOverW[0] = triDesc.OneOverW[1] = triDesc.OneOverW[2] = 1.0f;
- triDesc.I[0] = triDesc.I[1] = triDesc.I[2] = 0.0f;
- triDesc.J[0] = triDesc.J[1] = triDesc.J[2] = 0.0f;
- triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
-
- RenderOutputBuffers renderBuffers;
- GetRenderHotTiles(pDC,
- workerId,
- macroTile,
- tileAlignedX >> KNOB_TILE_X_DIM_SHIFT,
- tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
- renderBuffers,
- triDesc.triFlags.renderTargetArrayIndex);
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
- backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
- RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
-}
-
-void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
-{
- const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
- const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
- const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
-
- bool isPointSpriteTexCoordEnabled = backendState.pointSpriteTexCoordMask != 0;
-
- // load point vertex
- float x = *workDesc.pTriBuffer;
- float y = *(workDesc.pTriBuffer + 1);
- float z = *(workDesc.pTriBuffer + 2);
-
- // create a copy of the triangle buffer to write our adjusted vertices to
- OSALIGNSIMD(float) newTriBuffer[4 * 4];
- TRIANGLE_WORK_DESC newWorkDesc = workDesc;
- newWorkDesc.pTriBuffer = &newTriBuffer[0];
-
- // create a copy of the attrib buffer to write our adjusted attribs to
- OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
- newWorkDesc.pAttribs = &newAttribBuffer[0];
-
- newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
- newWorkDesc.numAttribs = workDesc.numAttribs;
- newWorkDesc.triFlags = workDesc.triFlags;
-
- // construct two tris by bloating point by point size
- float halfPointSize = workDesc.triFlags.pointSize * 0.5f;
- float lowerX = x - halfPointSize;
- float upperX = x + halfPointSize;
- float lowerY = y - halfPointSize;
- float upperY = y + halfPointSize;
-
- // tri 0
- float* pBuf = &newTriBuffer[0];
- *pBuf++ = lowerX;
- *pBuf++ = lowerX;
- *pBuf++ = upperX;
- pBuf++;
- *pBuf++ = lowerY;
- *pBuf++ = upperY;
- *pBuf++ = upperY;
- pBuf++;
- _mm_store_ps(pBuf, _mm_set1_ps(z));
- _mm_store_ps(pBuf += 4, _mm_set1_ps(1.0f));
-
- // setup triangle rasterizer function
- PFN_WORK_FUNC pfnTriRast;
- // conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
- rastState.bIsCenterPattern,
- false,
- SWR_INPUT_COVERAGE_NONE,
- EdgeValToEdgeState(ALL_EDGES_VALID),
- (pDC->pState->state.scissorsTileAligned == false));
-
- // overwrite texcoords for point sprites
- if (isPointSpriteTexCoordEnabled)
- {
- // copy original attribs
- memcpy(&newAttribBuffer[0], workDesc.pAttribs, 4 * 3 * workDesc.numAttribs * sizeof(float));
- newWorkDesc.pAttribs = &newAttribBuffer[0];
-
- // overwrite texcoord for point sprites
- uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
- unsigned long texCoordAttrib = 0;
-
- while (_BitScanForward(&texCoordAttrib, texCoordMask))
- {
- texCoordMask &= ~(1 << texCoordAttrib);
- __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib;
- if (rastState.pointSpriteTopOrigin)
- {
- pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
- pTexAttrib[1] = _mm_set_ps(1, 0, 1, 0);
- pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1);
- }
- else
- {
- pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0);
- pTexAttrib[1] = _mm_set_ps(1, 0, 0, 0);
- pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
- }
- }
- }
- else
- {
- // no texcoord overwrite, can reuse the attrib buffer from frontend
- newWorkDesc.pAttribs = workDesc.pAttribs;
- }
-
- pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
-
- // tri 1
- pBuf = &newTriBuffer[0];
- *pBuf++ = lowerX;
- *pBuf++ = upperX;
- *pBuf++ = upperX;
- pBuf++;
- *pBuf++ = lowerY;
- *pBuf++ = upperY;
- *pBuf++ = lowerY;
- // z, w unchanged
-
- if (isPointSpriteTexCoordEnabled)
- {
- uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
- unsigned long texCoordAttrib = 0;
-
- while (_BitScanForward(&texCoordAttrib, texCoordMask))
- {
- texCoordMask &= ~(1 << texCoordAttrib);
- __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib;
- if (rastState.pointSpriteTopOrigin)
- {
- pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
- pTexAttrib[1] = _mm_set_ps(1, 0, 1, 1);
- pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
- }
- else
- {
- pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0);
- pTexAttrib[1] = _mm_set_ps(1, 0, 0, 1);
- pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1);
- }
- }
- }
-
- pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
-}
-
-void InitRasterizerFunctions()
-{
- InitRasterizerFuncs();
-}
-
-// Selector for correct templated RasterizeTriangle function
-PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
- bool IsCenter,
- bool IsConservative,
- SWR_INPUT_COVERAGE InputCoverage,
- uint32_t EdgeEnable,
- bool RasterizeScissorEdges)
-{
- SWR_ASSERT(numSamples >= 0 && numSamples < SWR_MULTISAMPLE_TYPE_COUNT);
- SWR_ASSERT(InputCoverage >= 0 && InputCoverage < SWR_INPUT_COVERAGE_COUNT);
- SWR_ASSERT(EdgeEnable < STATE_VALID_TRI_EDGE_COUNT);
-
- PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage]
- [EdgeEnable][RasterizeScissorEdges];
- SWR_ASSERT(func);
-
- return func;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
deleted file mode 100644
index f15cc193129..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file rasterizer.h
- *
- * @brief Definitions for the rasterizer.
- *
- ******************************************************************************/
-#pragma once
-
-#include "context.h"
-#include <type_traits>
-#include "conservativeRast.h"
-#include "multisample.h"
-
-void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
-void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
-void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
-void InitRasterizerFunctions();
-
-INLINE
-__m128i fpToFixedPoint(const __m128 vIn)
-{
- __m128 vFixed = _mm_mul_ps(vIn, _mm_set1_ps(FIXED_POINT_SCALE));
- return _mm_cvtps_epi32(vFixed);
-}
-
-enum TriEdgesStates
-{
- STATE_NO_VALID_EDGES = 0,
- STATE_E0_E1_VALID,
- STATE_E0_E2_VALID,
- STATE_E1_E2_VALID,
- STATE_ALL_EDGES_VALID,
- STATE_VALID_TRI_EDGE_COUNT,
-};
-
-enum TriEdgesValues
-{
- NO_VALID_EDGES = 0,
- E0_E1_VALID = 0x3,
- E0_E2_VALID = 0x5,
- E1_E2_VALID = 0x6,
- ALL_EDGES_VALID = 0x7,
- VALID_TRI_EDGE_COUNT,
-};
-
-// Selector for correct templated RasterizeTriangle function
-PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
- bool IsCenter,
- bool IsConservative,
- SWR_INPUT_COVERAGE InputCoverage,
- uint32_t EdgeEnable,
- bool RasterizeScissorEdges);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ValidTriEdges convenience typedefs used for templated function
-/// specialization supported Fixed Point precisions
-typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT;
-typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
-typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
-typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
-typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;
-
-typedef std::integral_constant<uint32_t, STATE_ALL_EDGES_VALID> StateAllEdgesValidT;
-typedef std::integral_constant<uint32_t, STATE_E0_E1_VALID> StateE0E1ValidT;
-typedef std::integral_constant<uint32_t, STATE_E0_E2_VALID> StateE0E2ValidT;
-typedef std::integral_constant<uint32_t, STATE_E1_E2_VALID> StateE1E2ValidT;
-typedef std::integral_constant<uint32_t, STATE_NO_VALID_EDGES> StateNoEdgesValidT;
-
-// some specializations to convert from edge state to edge bitmask values
-template <typename EdgeMask>
-struct EdgeMaskVal
-{
- static_assert(EdgeMask::value > STATE_ALL_EDGES_VALID,
- "Primary EdgeMaskVal shouldn't be instantiated");
-};
-
-template <>
-struct EdgeMaskVal<StateAllEdgesValidT>
-{
- typedef AllEdgesValidT T;
-};
-
-template <>
-struct EdgeMaskVal<StateE0E1ValidT>
-{
- typedef E0E1ValidT T;
-};
-
-template <>
-struct EdgeMaskVal<StateE0E2ValidT>
-{
- typedef E0E2ValidT T;
-};
-
-template <>
-struct EdgeMaskVal<StateE1E2ValidT>
-{
- typedef E1E2ValidT T;
-};
-
-template <>
-struct EdgeMaskVal<StateNoEdgesValidT>
-{
- typedef NoEdgesValidT T;
-};
-
-INLINE uint32_t EdgeValToEdgeState(uint32_t val)
-{
- SWR_ASSERT(val < VALID_TRI_EDGE_COUNT, "Unexpected tri edge mask");
- static const uint32_t edgeValToEdgeState[VALID_TRI_EDGE_COUNT] = {0, 0, 0, 1, 0, 2, 3, 4};
- return edgeValToEdgeState[val];
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct RasterScissorEdgesT
-/// @brief Primary RasterScissorEdgesT templated struct that holds compile
-/// time information about the number of edges needed to be rasterized,
-/// If either the scissor rect or conservative rast is enabled,
-/// the scissor test is enabled and the rasterizer will test
-/// 3 triangle edges + 4 scissor edges for coverage.
-/// @tparam RasterScissorEdgesT: number of multisamples
-/// @tparam ConservativeT: is this a conservative rasterization
-/// @tparam EdgeMaskT: Which edges are valid(not degenerate)
-template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT>
-struct RasterEdgeTraits
-{
- typedef std::true_type RasterizeScissorEdgesT;
- typedef std::integral_constant<uint32_t, 7> NumEdgesT;
- // typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
- typedef typename EdgeMaskVal<EdgeMaskT>::T ValidEdgeMaskT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief specialization of RasterEdgeTraits. If neither scissor rect
-/// nor conservative rast is enabled, only test 3 triangle edges
-/// for coverage
-template <typename EdgeMaskT>
-struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
-{
- typedef std::false_type RasterizeScissorEdgesT;
- typedef std::integral_constant<uint32_t, 3> NumEdgesT;
- // no need for degenerate edge masking in non-conservative case; rasterize all triangle edges
- typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct RasterizerTraits
-/// @brief templated struct that holds compile time information used
-/// during rasterization. Inherits EdgeTraits and ConservativeRastBETraits.
-/// @tparam NumSamplesT: number of multisamples
-/// @tparam ConservativeT: is this a conservative rasterization
-/// @tparam InputCoverageT: what type of input coverage is the PS expecting?
-/// (only used with conservative rasterization)
-/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
-template <typename NumSamplesT,
- typename CenterPatternT,
- typename ConservativeT,
- typename InputCoverageT,
- typename EdgeEnableT,
- typename RasterScissorEdgesT>
-struct _RasterizerTraits : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
- public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, EdgeEnableT>
-{
- typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
- CenterPatternT::value>
- MT;
-
- /// Fixed point precision the rasterizer is using
- typedef FixedPointTraits<Fixed_16_8> PrecisionT;
- /// Fixed point precision of the edge tests used during rasterization
- typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;
-
- // If conservative rast or MSAA center pattern is enabled, only need a single sample coverage
- // test, with the result copied to all samples
- typedef std::integral_constant<int, ConservativeT::value ? 1 : MT::numCoverageSamples>
- NumCoverageSamplesT;
-
- static_assert(
- EdgePrecisionT::BitsT::value >=
- ConservativeRastBETraits<ConservativeT,
- InputCoverageT>::ConservativePrecisionT::BitsT::value,
- "Rasterizer edge fixed point precision < required conservative rast precision");
-
- /// constants used to offset between different types of raster tiles
- static const int colorRasterTileStep{
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) *
- MT::numSamples};
- static const int depthRasterTileStep{
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) *
- MT::numSamples};
- static const int stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM *
- (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) *
- MT::numSamples};
- static const int colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
- colorRasterTileStep};
- static const int depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
- depthRasterTileStep};
- static const int stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
- stencilRasterTileStep};
-};
-
-template <uint32_t NumSamplesT,
- uint32_t CenterPatternT,
- uint32_t ConservativeT,
- uint32_t InputCoverageT,
- uint32_t EdgeEnableT,
- uint32_t RasterScissorEdgesT>
-struct RasterizerTraits final
- : public _RasterizerTraits<std::integral_constant<uint32_t, NumSamplesT>,
- std::integral_constant<bool, CenterPatternT != 0>,
- std::integral_constant<bool, ConservativeT != 0>,
- std::integral_constant<uint32_t, InputCoverageT>,
- std::integral_constant<uint32_t, EdgeEnableT>,
- std::integral_constant<bool, RasterScissorEdgesT != 0>>
-{
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
deleted file mode 100644
index 2153fe653b1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
+++ /dev/null
@@ -1,1542 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file rasterizer.cpp
- *
- * @brief Implementation for the rasterizer.
- *
- ******************************************************************************/
-
-#include <vector>
-#include <algorithm>
-
-#include "rasterizer.h"
-#include "rdtsc_core.h"
-#include "backend.h"
-#include "utils.h"
-#include "frontend.h"
-#include "tilemgr.h"
-#include "memory/tilingtraits.h"
-
-extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
- [STATE_VALID_TRI_EDGE_COUNT][2];
-
-template <uint32_t numSamples = 1>
-void GetRenderHotTiles(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroID,
- uint32_t x,
- uint32_t y,
- RenderOutputBuffers& renderBuffers,
- uint32_t renderTargetArrayIndex);
-template <typename RT>
-void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers);
-template <typename RT>
-void StepRasterTileY(uint32_t colorHotTileMask,
- RenderOutputBuffers& buffers,
- RenderOutputBuffers& startBufferRow);
-
-#define MASKTOVEC(i3, i2, i1, i0) \
- { \
- -i0, -i1, -i2, -i3 \
- }
-static const __m256d gMaskToVecpd[] = {
- MASKTOVEC(0, 0, 0, 0),
- MASKTOVEC(0, 0, 0, 1),
- MASKTOVEC(0, 0, 1, 0),
- MASKTOVEC(0, 0, 1, 1),
- MASKTOVEC(0, 1, 0, 0),
- MASKTOVEC(0, 1, 0, 1),
- MASKTOVEC(0, 1, 1, 0),
- MASKTOVEC(0, 1, 1, 1),
- MASKTOVEC(1, 0, 0, 0),
- MASKTOVEC(1, 0, 0, 1),
- MASKTOVEC(1, 0, 1, 0),
- MASKTOVEC(1, 0, 1, 1),
- MASKTOVEC(1, 1, 0, 0),
- MASKTOVEC(1, 1, 0, 1),
- MASKTOVEC(1, 1, 1, 0),
- MASKTOVEC(1, 1, 1, 1),
-};
-
-struct POS
-{
- int32_t x, y;
-};
-
-struct EDGE
-{
- double a, b; // a, b edge coefficients in fix8
- double stepQuadX; // step to adjacent horizontal quad in fix16
- double stepQuadY; // step to adjacent vertical quad in fix16
- double stepRasterTileX; // step to adjacent horizontal raster tile in fix16
- double stepRasterTileY; // step to adjacent vertical raster tile in fix16
-
- __m256d vQuadOffsets; // offsets for 4 samples of a quad
- __m256d vRasterTileOffsets; // offsets for the 4 corners of a raster tile
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief rasterize a raster tile partially covered by the triangle
-/// @param vEdge0-2 - edge equations evaluated at sample pos at each of the 4 corners of a raster
-/// tile
-/// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C)
-/// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad.
-/// Used to step between quads when sweeping over the raster tile.
-template <uint32_t NumEdges, typename EdgeMaskT>
-INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT* pDC,
- double startEdges[NumEdges],
- EDGE* pRastEdges)
-{
- uint64_t coverageMask = 0;
-
- __m256d vEdges[NumEdges];
- __m256d vStepX[NumEdges];
- __m256d vStepY[NumEdges];
-
- for (uint32_t e = 0; e < NumEdges; ++e)
- {
- // Step to the pixel sample locations of the 1st quad
- vEdges[e] = _mm256_add_pd(_mm256_set1_pd(startEdges[e]), pRastEdges[e].vQuadOffsets);
-
- // compute step to next quad (mul by 2 in x and y direction)
- vStepX[e] = _mm256_set1_pd(pRastEdges[e].stepQuadX);
- vStepY[e] = _mm256_set1_pd(pRastEdges[e].stepQuadY);
- }
-
- // fast unrolled version for 8x8 tile
-#if KNOB_TILE_X_DIM == 8 && KNOB_TILE_Y_DIM == 8
- int edgeMask[NumEdges];
- uint64_t mask;
-
- auto eval_lambda = [&](int e) { edgeMask[e] = _mm256_movemask_pd(vEdges[e]); };
- auto update_lambda = [&](int e) { mask &= edgeMask[e]; };
- auto incx_lambda = [&](int e) { vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]); };
- auto incy_lambda = [&](int e) { vEdges[e] = _mm256_add_pd(vEdges[e], vStepY[e]); };
- auto decx_lambda = [&](int e) { vEdges[e] = _mm256_sub_pd(vEdges[e], vStepX[e]); };
-
-// evaluate which pixels in the quad are covered
-#define EVAL UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
-
- // update coverage mask
- // if edge 0 is degenerate and will be skipped; init the mask
-#define UPDATE_MASK(bit) \
- if (std::is_same<EdgeMaskT, E1E2ValidT>::value || \
- std::is_same<EdgeMaskT, NoEdgesValidT>::value) \
- { \
- mask = 0xf; \
- } \
- else \
- { \
- mask = edgeMask[0]; \
- } \
- UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
- coverageMask |= (mask << bit);
-
- // step in the +x direction to the next quad
-#define INCX UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
-
- // step in the +y direction to the next quad
-#define INCY UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
-
- // step in the -x direction to the next quad
-#define DECX UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
-
- // sweep 2x2 quad back and forth through the raster tile,
- // computing coverage masks for the entire tile
-
- // raster tile
- // 0 1 2 3 4 5 6 7
- // x x
- // x x ------------------>
- // x x |
- // <-----------------x x V
- // ..
-
- // row 0
- EVAL;
- UPDATE_MASK(0);
- INCX;
- EVAL;
- UPDATE_MASK(4);
- INCX;
- EVAL;
- UPDATE_MASK(8);
- INCX;
- EVAL;
- UPDATE_MASK(12);
- INCY;
-
- // row 1
- EVAL;
- UPDATE_MASK(28);
- DECX;
- EVAL;
- UPDATE_MASK(24);
- DECX;
- EVAL;
- UPDATE_MASK(20);
- DECX;
- EVAL;
- UPDATE_MASK(16);
- INCY;
-
- // row 2
- EVAL;
- UPDATE_MASK(32);
- INCX;
- EVAL;
- UPDATE_MASK(36);
- INCX;
- EVAL;
- UPDATE_MASK(40);
- INCX;
- EVAL;
- UPDATE_MASK(44);
- INCY;
-
- // row 3
- EVAL;
- UPDATE_MASK(60);
- DECX;
- EVAL;
- UPDATE_MASK(56);
- DECX;
- EVAL;
- UPDATE_MASK(52);
- DECX;
- EVAL;
- UPDATE_MASK(48);
-#else
- uint32_t bit = 0;
- for (uint32_t y = 0; y < KNOB_TILE_Y_DIM / 2; ++y)
- {
- __m256d vStartOfRowEdge[NumEdges];
- for (uint32_t e = 0; e < NumEdges; ++e)
- {
- vStartOfRowEdge[e] = vEdges[e];
- }
-
- for (uint32_t x = 0; x < KNOB_TILE_X_DIM / 2; ++x)
- {
- int edgeMask[NumEdges];
- for (uint32_t e = 0; e < NumEdges; ++e)
- {
- edgeMask[e] = _mm256_movemask_pd(vEdges[e]);
- }
-
- uint64_t mask = edgeMask[0];
- for (uint32_t e = 1; e < NumEdges; ++e)
- {
- mask &= edgeMask[e];
- }
- coverageMask |= (mask << bit);
-
- // step to the next pixel in the x
- for (uint32_t e = 0; e < NumEdges; ++e)
- {
- vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]);
- }
- bit += 4;
- }
-
- // step to the next row
- for (uint32_t e = 0; e < NumEdges; ++e)
- {
- vEdges[e] = _mm256_add_pd(vStartOfRowEdge[e], vStepY[e]);
- }
- }
-#endif
- return coverageMask;
-}
-// Top left rule:
-// Top: if an edge is horizontal, and it is above other edges in tri pixel space, it is a 'top' edge
-// Left: if an edge is not horizontal, and it is on the left side of the triangle in pixel space, it
-// is a 'left' edge Top left: a sample is in if it is a top or left edge. Out: !(horizontal &&
-// above) = !horizontal && below Out: !horizontal && left = !(!horizontal && left) = horizontal and
-// right
-INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256d& vEdge)
-{
- // if vA < 0, vC--
- // if vA == 0 && vB < 0, vC--
-
- __m256d vEdgeOut = vEdge;
- __m256d vEdgeAdjust = _mm256_sub_pd(vEdge, _mm256_set1_pd(1.0));
-
- // if vA < 0 (line is not horizontal and below)
- int msk = _mm_movemask_ps(_mm_castsi128_ps(vA));
-
- // if vA == 0 && vB < 0 (line is horizontal and we're on the left edge of a tri)
- __m128i vCmp = _mm_cmpeq_epi32(vA, _mm_setzero_si128());
- int msk2 = _mm_movemask_ps(_mm_castsi128_ps(vCmp));
- msk2 &= _mm_movemask_ps(_mm_castsi128_ps(vB));
-
- // if either of these are true and we're on the line (edge == 0), bump it outside the line
- vEdge = _mm256_blendv_pd(vEdgeOut, vEdgeAdjust, gMaskToVecpd[msk | msk2]);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief calculates difference in precision between the result of manh
-/// calculation and the edge precision, based on compile time trait values
-template <typename RT>
-constexpr int64_t ManhToEdgePrecisionAdjust()
-{
- static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >=
- RT::EdgePrecisionT::BitsT::value,
- "Inadequate precision of result of manh calculation ");
- return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) -
- RT::EdgePrecisionT::BitsT::value);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct adjustEdgeConservative
-/// @brief Primary template definition used for partially specializing
-/// the adjustEdgeConservative function. This struct should never
-/// be instantiated.
-/// @tparam RT: rasterizer traits
-/// @tparam ConservativeEdgeOffsetT: does the edge need offsetting?
-template <typename RT, typename ConservativeEdgeOffsetT>
-struct adjustEdgeConservative
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Performs calculations to adjust each edge of a triangle away
- /// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
- /// direction.
- ///
- /// Uncertainty regions arise from fixed point rounding, which
- /// can snap a vertex +/- by min fixed point value.
- /// Adding 1/2 pixel in x/y bumps the edge equation tests out towards the pixel corners.
- /// This allows the rasterizer to test for coverage only at the pixel center,
- /// instead of having to test individual pixel corners for conservative coverage
- INLINE adjustEdgeConservative(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
- {
- // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge
- // away from the pixel center (in the direction of the edge normal A/B)
-
- // edge = Ax + Bx + C - (manh/e)
- // manh = manhattan distance = abs(A) + abs(B)
- // e = absolute rounding error from snapping from float to fixed point precision
-
- // 'fixed point' multiply (in double to be avx1 friendly)
- // need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
- __m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)),
- vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
- __m256d manh =
- _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)),
- _mm256_mul_pd(vBai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)));
-
- static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >=
- RT::EdgePrecisionT::BitsT::value,
- "Inadequate precision of result of manh calculation ");
-
- // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the
- // same precision since we're doing fixed math in double format, multiply by multiples of
- // 1/2 instead of a bit shift right
- manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5));
-
- // move the edge away from the pixel center by the required conservative precision + 1/2
- // pixel this allows the rasterizer to do a single conservative coverage test to see if the
- // primitive intersects the pixel at all
- vEdge = _mm256_sub_pd(vEdge, manh);
- };
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief adjustEdgeConservative specialization where no edge offset is needed
-template <typename RT>
-struct adjustEdgeConservative<RT, std::integral_constant<int32_t, 0>>
-{
- INLINE adjustEdgeConservative(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge){};
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief calculates the distance a degenerate BBox needs to be adjusted
-/// for conservative rast based on compile time trait values
-template <typename RT>
-constexpr int64_t ConservativeScissorOffset()
-{
- static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0,
- "Rasterizer precision > conservative precision");
- // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox
- // when calculating scissor edges
- typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1>
- DegenerateEdgeOffsetT;
- // 1/2 pixel edge offset + conservative offset - degenerateTriangle
- return RT::ConservativeEdgeOffsetT::value -
- (DegenerateEdgeOffsetT::value
- << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Performs calculations to adjust each a vector of evaluated edges out
-/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-/// direction.
-template <typename RT>
-INLINE void adjustScissorEdge(const double a, const double b, __m256d& vEdge)
-{
- int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
- int64_t manh =
- ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >>
- ManhToEdgePrecisionAdjust<RT>();
- vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Performs calculations to adjust each a scalar evaluated edge out
-/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-/// direction.
-template <typename RT, typename OffsetT>
-INLINE double adjustScalarEdge(const double a, const double b, const double Edge)
-{
- int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
- int64_t manh =
- ((aabs * OffsetT::value) + (babs * OffsetT::value)) >> ManhToEdgePrecisionAdjust<RT>();
- return (Edge - manh);
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Perform any needed adjustments to evaluated triangle edges
-template <typename RT, typename EdgeOffsetT>
-struct adjustEdgesFix16
-{
- INLINE adjustEdgesFix16(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
- {
- static_assert(
- std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
- "Edge equation expected to be in x.16 fixed point");
-
- static_assert(RT::IsConservativeT::value,
- "Edge offset assumes conservative rasterization is enabled");
-
- // need to apply any edge offsets before applying the top-left rule
- adjustEdgeConservative<RT, EdgeOffsetT>(vAi, vBi, vEdge);
-
- adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Perform top left adjustments to evaluated triangle edges
-template <typename RT>
-struct adjustEdgesFix16<RT, std::integral_constant<int32_t, 0>>
-{
- INLINE adjustEdgesFix16(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
- {
- adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
- }
-};
-
-// max(abs(dz/dx), abs(dz,dy)
-INLINE float ComputeMaxDepthSlope(const SWR_TRIANGLE_DESC* pDesc)
-{
- /*
- // evaluate i,j at (0,0)
- float i00 = pDesc->I[0] * 0.0f + pDesc->I[1] * 0.0f + pDesc->I[2];
- float j00 = pDesc->J[0] * 0.0f + pDesc->J[1] * 0.0f + pDesc->J[2];
-
- // evaluate i,j at (1,0)
- float i10 = pDesc->I[0] * 1.0f + pDesc->I[1] * 0.0f + pDesc->I[2];
- float j10 = pDesc->J[0] * 1.0f + pDesc->J[1] * 0.0f + pDesc->J[2];
-
- // compute dz/dx
- float d00 = pDesc->Z[0] * i00 + pDesc->Z[1] * j00 + pDesc->Z[2];
- float d10 = pDesc->Z[0] * i10 + pDesc->Z[1] * j10 + pDesc->Z[2];
- float dzdx = abs(d10 - d00);
-
- // evaluate i,j at (0,1)
- float i01 = pDesc->I[0] * 0.0f + pDesc->I[1] * 1.0f + pDesc->I[2];
- float j01 = pDesc->J[0] * 0.0f + pDesc->J[1] * 1.0f + pDesc->J[2];
-
- float d01 = pDesc->Z[0] * i01 + pDesc->Z[1] * j01 + pDesc->Z[2];
- float dzdy = abs(d01 - d00);
- */
-
- // optimized version of above
- float dzdx = fabsf(pDesc->recipDet * (pDesc->Z[0] * pDesc->I[0] + pDesc->Z[1] * pDesc->J[0]));
- float dzdy = fabsf(pDesc->recipDet * (pDesc->Z[0] * pDesc->I[1] + pDesc->Z[1] * pDesc->J[1]));
-
- return std::max(dzdx, dzdy);
-}
-
-INLINE float
-ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pDesc, const float* z)
-{
- if (pState->depthFormat == R24_UNORM_X8_TYPELESS)
- {
- return (1.0f / (1 << 24));
- }
- else if (pState->depthFormat == R16_UNORM)
- {
- return (1.0f / (1 << 16));
- }
- else
- {
- SWR_ASSERT(pState->depthFormat == R32_FLOAT);
-
- // for f32 depth, factor = 2^(exponent(max(abs(z) - 23)
- float zMax = std::max(fabsf(z[0]), std::max(fabsf(z[1]), fabsf(z[2])));
- uint32_t zMaxInt = *(uint32_t*)&zMax;
- zMaxInt &= 0x7f800000;
- zMax = *(float*)&zMaxInt;
-
- return zMax * (1.0f / (1 << 23));
- }
-}
-
-INLINE float
-ComputeDepthBias(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pTri, const float* z)
-{
- if (pState->depthBias == 0 && pState->slopeScaledDepthBias == 0)
- {
- return 0.0f;
- }
-
- float scale = pState->slopeScaledDepthBias;
- if (scale != 0.0f)
- {
- scale *= ComputeMaxDepthSlope(pTri);
- }
-
- float bias = pState->depthBias;
- if (!pState->depthBiasPreAdjusted)
- {
- bias *= ComputeBiasFactor(pState, pTri, z);
- }
- bias += scale;
-
- if (pState->depthBiasClamp > 0.0f)
- {
- bias = std::min(bias, pState->depthBiasClamp);
- }
- else if (pState->depthBiasClamp < 0.0f)
- {
- bias = std::max(bias, pState->depthBiasClamp);
- }
-
- return bias;
-}
-
-// Prevent DCE by writing coverage mask from rasterizer to volatile
-#if KNOB_ENABLE_TOSS_POINTS
-__declspec(thread) volatile uint64_t gToss;
-#endif
-
-static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
-// try to avoid _chkstk insertions; make this thread local
-static THREAD
-OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
-
-INLINE
-void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
-{
- edge.a = a;
- edge.b = b;
-
- // compute constant steps to adjacent quads
- edge.stepQuadX = (double)((int64_t)a * (int64_t)(2 * FIXED_POINT_SCALE));
- edge.stepQuadY = (double)((int64_t)b * (int64_t)(2 * FIXED_POINT_SCALE));
-
- // compute constant steps to adjacent raster tiles
- edge.stepRasterTileX = (double)((int64_t)a * (int64_t)(KNOB_TILE_X_DIM * FIXED_POINT_SCALE));
- edge.stepRasterTileY = (double)((int64_t)b * (int64_t)(KNOB_TILE_Y_DIM * FIXED_POINT_SCALE));
-
- // compute quad offsets
- const __m256d vQuadOffsetsXIntFix8 = _mm256_set_pd(FIXED_POINT_SCALE, 0, FIXED_POINT_SCALE, 0);
- const __m256d vQuadOffsetsYIntFix8 = _mm256_set_pd(FIXED_POINT_SCALE, FIXED_POINT_SCALE, 0, 0);
-
- __m256d vQuadStepXFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.a), vQuadOffsetsXIntFix8);
- __m256d vQuadStepYFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.b), vQuadOffsetsYIntFix8);
- edge.vQuadOffsets = _mm256_add_pd(vQuadStepXFix16, vQuadStepYFix16);
-
- // compute raster tile offsets
- const __m256d vTileOffsetsXIntFix8 = _mm256_set_pd(
- (KNOB_TILE_X_DIM - 1) * FIXED_POINT_SCALE, 0, (KNOB_TILE_X_DIM - 1) * FIXED_POINT_SCALE, 0);
- const __m256d vTileOffsetsYIntFix8 = _mm256_set_pd(
- (KNOB_TILE_Y_DIM - 1) * FIXED_POINT_SCALE, (KNOB_TILE_Y_DIM - 1) * FIXED_POINT_SCALE, 0, 0);
-
- __m256d vTileStepXFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.a), vTileOffsetsXIntFix8);
- __m256d vTileStepYFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.b), vTileOffsetsYIntFix8);
- edge.vRasterTileOffsets = _mm256_add_pd(vTileStepXFix16, vTileStepYFix16);
-}
-
-INLINE
-void ComputeEdgeData(const POS& p0, const POS& p1, EDGE& edge)
-{
- ComputeEdgeData(p0.y - p1.y, p1.x - p0.x, edge);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Primary template definition used for partially specializing
-/// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel
-/// corner to sample position, and test for coverage
-/// @tparam sampleCount: multisample count
-template <typename NumSamplesT>
-INLINE void UpdateEdgeMasks(const __m256d (&vEdgeTileBbox)[3],
- const __m256d* vEdgeFix16,
- int32_t& mask0,
- int32_t& mask1,
- int32_t& mask2)
-{
- __m256d vSampleBboxTest0, vSampleBboxTest1, vSampleBboxTest2;
- // evaluate edge equations at the tile multisample bounding box
- vSampleBboxTest0 = _mm256_add_pd(vEdgeTileBbox[0], vEdgeFix16[0]);
- vSampleBboxTest1 = _mm256_add_pd(vEdgeTileBbox[1], vEdgeFix16[1]);
- vSampleBboxTest2 = _mm256_add_pd(vEdgeTileBbox[2], vEdgeFix16[2]);
- mask0 = _mm256_movemask_pd(vSampleBboxTest0);
- mask1 = _mm256_movemask_pd(vSampleBboxTest1);
- mask2 = _mm256_movemask_pd(vSampleBboxTest2);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief UpdateEdgeMasks<SingleSampleT> specialization, instantiated
-/// when only rasterizing a single coverage test point
-template <>
-INLINE void UpdateEdgeMasks<SingleSampleT>(
- const __m256d (&)[3], const __m256d* vEdgeFix16, int32_t& mask0, int32_t& mask1, int32_t& mask2)
-{
- mask0 = _mm256_movemask_pd(vEdgeFix16[0]);
- mask1 = _mm256_movemask_pd(vEdgeFix16[1]);
- mask2 = _mm256_movemask_pd(vEdgeFix16[2]);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @struct ComputeScissorEdges
-/// @brief Primary template definition. Allows the function to be generically
-/// called. When paired with below specializations, will result in an empty
-/// inlined function if scissor is not enabled
-/// @tparam RasterScissorEdgesT: is scissor enabled?
-/// @tparam IsConservativeT: is conservative rast enabled?
-/// @tparam RT: rasterizer traits
-template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
-struct ComputeScissorEdges
-{
- INLINE ComputeScissorEdges(const SWR_RECT& triBBox,
- const SWR_RECT& scissorBBox,
- const int32_t x,
- const int32_t y,
- EDGE (&rastEdges)[RT::NumEdgesT::value],
- __m256d (&vEdgeFix16)[7]){};
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ComputeScissorEdges<std::true_type, std::true_type, RT> partial
-/// specialization. Instantiated when conservative rast and scissor are enabled
-template <typename RT>
-struct ComputeScissorEdges<std::true_type, std::true_type, RT>
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
- /// evaluate edge equations and offset them away from pixel center.
- INLINE ComputeScissorEdges(const SWR_RECT& triBBox,
- const SWR_RECT& scissorBBox,
- const int32_t x,
- const int32_t y,
- EDGE (&rastEdges)[RT::NumEdgesT::value],
- __m256d (&vEdgeFix16)[7])
- {
- // if conservative rasterizing, triangle bbox intersected with scissor bbox is used
- SWR_RECT scissor;
- scissor.xmin = std::max(triBBox.xmin, scissorBBox.xmin);
- scissor.xmax = std::min(triBBox.xmax, scissorBBox.xmax);
- scissor.ymin = std::max(triBBox.ymin, scissorBBox.ymin);
- scissor.ymax = std::min(triBBox.ymax, scissorBBox.ymax);
-
- POS topLeft{scissor.xmin, scissor.ymin};
- POS bottomLeft{scissor.xmin, scissor.ymax};
- POS topRight{scissor.xmax, scissor.ymin};
- POS bottomRight{scissor.xmax, scissor.ymax};
-
- // construct 4 scissor edges in ccw direction
- ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
- ComputeEdgeData(bottomLeft, bottomRight, rastEdges[4]);
- ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
- ComputeEdgeData(topRight, topLeft, rastEdges[6]);
-
- vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) +
- (rastEdges[3].b * (y - scissor.ymin)));
- vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) +
- (rastEdges[4].b * (y - scissor.ymax)));
- vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) +
- (rastEdges[5].b * (y - scissor.ymax)));
- vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) +
- (rastEdges[6].b * (y - scissor.ymin)));
-
- // if conservative rasterizing, need to bump the scissor edges out by the conservative
- // uncertainty distance, else do nothing
- adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
- adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
- adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
- adjustScissorEdge<RT>(rastEdges[6].a, rastEdges[6].b, vEdgeFix16[6]);
-
- // Upper left rule for scissor
- vEdgeFix16[3] = _mm256_sub_pd(vEdgeFix16[3], _mm256_set1_pd(1.0));
- vEdgeFix16[6] = _mm256_sub_pd(vEdgeFix16[6], _mm256_set1_pd(1.0));
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief ComputeScissorEdges<std::true_type, std::false_type, RT> partial
-/// specialization. Instantiated when scissor is enabled and conservative rast
-/// is disabled.
-template <typename RT>
-struct ComputeScissorEdges<std::true_type, std::false_type, RT>
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Compute scissor edge vectors and evaluate edge equations
- INLINE ComputeScissorEdges(const SWR_RECT&,
- const SWR_RECT& scissorBBox,
- const int32_t x,
- const int32_t y,
- EDGE (&rastEdges)[RT::NumEdgesT::value],
- __m256d (&vEdgeFix16)[7])
- {
- const SWR_RECT& scissor = scissorBBox;
- POS topLeft{scissor.xmin, scissor.ymin};
- POS bottomLeft{scissor.xmin, scissor.ymax};
- POS topRight{scissor.xmax, scissor.ymin};
- POS bottomRight{scissor.xmax, scissor.ymax};
-
- // construct 4 scissor edges in ccw direction
- ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
- ComputeEdgeData(bottomLeft, bottomRight, rastEdges[4]);
- ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
- ComputeEdgeData(topRight, topLeft, rastEdges[6]);
-
- vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) +
- (rastEdges[3].b * (y - scissor.ymin)));
- vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) +
- (rastEdges[4].b * (y - scissor.ymax)));
- vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) +
- (rastEdges[5].b * (y - scissor.ymax)));
- vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) +
- (rastEdges[6].b * (y - scissor.ymin)));
-
- // Upper left rule for scissor
- vEdgeFix16[3] = _mm256_sub_pd(vEdgeFix16[3], _mm256_set1_pd(1.0));
- vEdgeFix16[6] = _mm256_sub_pd(vEdgeFix16[6], _mm256_set1_pd(1.0));
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Primary function template for TrivialRejectTest. Should
-/// never be called, but TemplateUnroller instantiates a few unused values,
-/// so it calls a runtime assert instead of a static_assert.
-template <typename ValidEdgeMaskT>
-INLINE bool TrivialRejectTest(const int, const int, const int)
-{
- SWR_INVALID("Primary templated function should never be called");
- return false;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief E0E1ValidT specialization of TrivialRejectTest. Tests edge 0
-/// and edge 1 for trivial coverage reject
-template <>
-INLINE bool TrivialRejectTest<E0E1ValidT>(const int mask0, const int mask1, const int)
-{
- return (!(mask0 && mask1)) ? true : false;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief E0E2ValidT specialization of TrivialRejectTest. Tests edge 0
-/// and edge 2 for trivial coverage reject
-template <>
-INLINE bool TrivialRejectTest<E0E2ValidT>(const int mask0, const int, const int mask2)
-{
- return (!(mask0 && mask2)) ? true : false;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief E1E2ValidT specialization of TrivialRejectTest. Tests edge 1
-/// and edge 2 for trivial coverage reject
-template <>
-INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int mask2)
-{
- return (!(mask1 && mask2)) ? true : false;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief AllEdgesValidT specialization of TrivialRejectTest. Tests all
-/// primitive edges for trivial coverage reject
-template <>
-INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
-{
- return (!(mask0 && mask1 && mask2)) ? true : false;
- ;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief NoEdgesValidT specialization of TrivialRejectTest. Degenerate
-/// point, so return false and rasterize against conservative BBox
-template <>
-INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int)
-{
- return false;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Primary function template for TrivialAcceptTest. Always returns
-/// false, since it will only be called for degenerate tris, and as such
-/// will never cover the entire raster tile
-template <typename ScissorEnableT>
-INLINE bool TrivialAcceptTest(const int, const int, const int)
-{
- return false;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief AllEdgesValidT specialization for TrivialAcceptTest. Test all
-/// edge masks for a fully covered raster tile
-template <>
-INLINE bool TrivialAcceptTest<std::false_type>(const int mask0, const int mask1, const int mask2)
-{
- return ((mask0 & mask1 & mask2) == 0xf);
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Primary function template for GenerateSVInnerCoverage. Results
-/// in an empty function call if SVInnerCoverage isn't requested
-template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
-struct GenerateSVInnerCoverage
-{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t&){};
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Specialization of GenerateSVInnerCoverage where all edges
-/// are non-degenerate and SVInnerCoverage is requested. Offsets the evaluated
-/// edge values from OuterConservative to InnerConservative and rasterizes.
-template <typename RT>
-struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
-{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- EDGE* pRastEdges,
- double* pStartQuadEdges,
- uint64_t& innerCoverageMask)
- {
- double startQuadEdgesAdj[RT::NumEdgesT::value];
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- startQuadEdgesAdj[e] = adjustScalarEdge<RT, typename RT::InnerConservativeEdgeOffsetT>(
- pRastEdges[e].a, pRastEdges[e].b, pStartQuadEdges[e]);
- }
-
- // not trivial accept or reject, must rasterize full tile
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId);
- innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
- pDC, startQuadEdgesAdj, pRastEdges);
- RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Primary function template for UpdateEdgeMasksInnerConservative. Results
-/// in an empty function call if SVInnerCoverage isn't requested
-template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
-struct UpdateEdgeMasksInnerConservative
-{
- INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3],
- const __m256d*,
- const __m128i,
- const __m128i,
- int32_t&,
- int32_t&,
- int32_t&){};
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Specialization of UpdateEdgeMasksInnerConservative where all edges
-/// are non-degenerate and SVInnerCoverage is requested. Offsets the edges
-/// evaluated at raster tile corners to inner conservative position and
-/// updates edge masks
-template <typename RT>
-struct UpdateEdgeMasksInnerConservative<RT, AllEdgesValidT, InnerConservativeCoverageT>
-{
- INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3],
- const __m256d* vEdgeFix16,
- const __m128i vAi,
- const __m128i vBi,
- int32_t& mask0,
- int32_t& mask1,
- int32_t& mask2)
- {
- __m256d vTempEdge[3]{vEdgeFix16[0], vEdgeFix16[1], vEdgeFix16[2]};
-
- // instead of keeping 2 copies of evaluated edges around, just compensate for the outer
- // conservative evaluated edge when adjusting the edge in for inner conservative tests
- adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
- vAi, vBi, vTempEdge[0]);
- adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
- vAi, vBi, vTempEdge[1]);
- adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
- vAi, vBi, vTempEdge[2]);
-
- UpdateEdgeMasks<typename RT::NumCoverageSamplesT>(
- vEdgeTileBbox, vTempEdge, mask0, mask1, mask2);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Specialization of UpdateEdgeMasksInnerConservative where SVInnerCoverage
-/// is requested but at least one edge is degenerate. Since a degenerate triangle cannot
-/// cover an entire raster tile, set mask0 to 0 to force it down the
-/// rastierizePartialTile path
-template <typename RT, typename ValidEdgeMaskT>
-struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCoverageT>
-{
- INLINE UpdateEdgeMasksInnerConservative(const __m256d (&)[3],
- const __m256d*,
- const __m128i,
- const __m128i,
- int32_t& mask0,
- int32_t&,
- int32_t&)
- {
- // set one mask to zero to force the triangle down the rastierizePartialTile path
- mask0 = 0;
- }
-};
-
-template <typename RT>
-void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
-{
- const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
-#if KNOB_ENABLE_TOSS_POINTS
- if (KNOB_TOSS_BIN_TRIS)
- {
- return;
- }
-#endif
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeTriangle, pDC->drawId);
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BETriangleSetup, pDC->drawId);
-
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
-
- OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
- triDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
-
- __m128 vX, vY, vZ, vRecipW;
-
- // pTriBuffer data layout: grouped components of the 3 triangle points and 1 don't care
- // eg: vX = [x0 x1 x2 dc]
- vX = _mm_load_ps(workDesc.pTriBuffer);
- vY = _mm_load_ps(workDesc.pTriBuffer + 4);
- vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
- vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
-
- // convert to fixed point
- static_assert(std::is_same<typename RT::PrecisionT, FixedPointTraits<Fixed_16_8>>::value,
- "Rasterizer expects 16.8 fixed point precision");
- __m128i vXi = fpToFixedPoint(vX);
- __m128i vYi = fpToFixedPoint(vY);
-
- // quantize floating point position to fixed point precision
- // to prevent attribute creep around the triangle vertices
- vX = _mm_mul_ps(_mm_cvtepi32_ps(vXi), _mm_set1_ps(1.0f / FIXED_POINT_SCALE));
- vY = _mm_mul_ps(_mm_cvtepi32_ps(vYi), _mm_set1_ps(1.0f / FIXED_POINT_SCALE));
-
- // triangle setup - A and B edge equation coefs
- __m128 vA, vB;
- triangleSetupAB(vX, vY, vA, vB);
-
- __m128i vAi, vBi;
- triangleSetupABInt(vXi, vYi, vAi, vBi);
-
- // determinant
- float det = calcDeterminantInt(vAi, vBi);
-
- // Verts in Pixel Coordinate Space at this point
- // Det > 0 = CW winding order
- // Convert CW triangles to CCW
- if (det > 0.0)
- {
- vA = _mm_mul_ps(vA, _mm_set1_ps(-1));
- vB = _mm_mul_ps(vB, _mm_set1_ps(-1));
- vAi = _mm_mullo_epi32(vAi, _mm_set1_epi32(-1));
- vBi = _mm_mullo_epi32(vBi, _mm_set1_epi32(-1));
- det = -det;
- }
-
- __m128 vC;
- // Finish triangle setup - C edge coef
- triangleSetupC(vX, vY, vA, vB, vC);
-
- if (RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
- {
- // If we have degenerate edge(s) to rasterize, set I and J coefs
- // to 0 for constant interpolation of attributes
- triDesc.I[0] = 0.0f;
- triDesc.I[1] = 0.0f;
- triDesc.I[2] = 0.0f;
- triDesc.J[0] = 0.0f;
- triDesc.J[1] = 0.0f;
- triDesc.J[2] = 0.0f;
-
- // Degenerate triangles have no area
- triDesc.recipDet = 0.0f;
- }
- else
- {
- // only extract coefs for 2 of the barycentrics; the 3rd can be
- // determined from the barycentric equation:
- // i + j + k = 1 <=> k = 1 - j - i
- _MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
- _MM_EXTRACT_FLOAT(triDesc.I[1], vB, 1);
- _MM_EXTRACT_FLOAT(triDesc.I[2], vC, 1);
- _MM_EXTRACT_FLOAT(triDesc.J[0], vA, 2);
- _MM_EXTRACT_FLOAT(triDesc.J[1], vB, 2);
- _MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
-
- // compute recipDet, used to calculate barycentric i and j in the backend
- triDesc.recipDet = 1.0f / det;
- }
-
- OSALIGNSIMD(float) oneOverW[4];
- _mm_store_ps(oneOverW, vRecipW);
- triDesc.OneOverW[0] = oneOverW[0] - oneOverW[2];
- triDesc.OneOverW[1] = oneOverW[1] - oneOverW[2];
- triDesc.OneOverW[2] = oneOverW[2];
-
- // calculate perspective correct coefs per vertex attrib
- float* pPerspAttribs = perspAttribsTLS;
- float* pAttribs = workDesc.pAttribs;
- triDesc.pPerspAttribs = pPerspAttribs;
- triDesc.pAttribs = pAttribs;
- float* pRecipW = workDesc.pTriBuffer + 12;
- triDesc.pRecipW = pRecipW;
- __m128 vOneOverWV0 = _mm_broadcast_ss(pRecipW);
- __m128 vOneOverWV1 = _mm_broadcast_ss(pRecipW += 1);
- __m128 vOneOverWV2 = _mm_broadcast_ss(pRecipW += 1);
- for (uint32_t i = 0; i < workDesc.numAttribs; i++)
- {
- __m128 attribA = _mm_load_ps(pAttribs);
- __m128 attribB = _mm_load_ps(pAttribs += 4);
- __m128 attribC = _mm_load_ps(pAttribs += 4);
- pAttribs += 4;
-
- attribA = _mm_mul_ps(attribA, vOneOverWV0);
- attribB = _mm_mul_ps(attribB, vOneOverWV1);
- attribC = _mm_mul_ps(attribC, vOneOverWV2);
-
- _mm_store_ps(pPerspAttribs, attribA);
- _mm_store_ps(pPerspAttribs += 4, attribB);
- _mm_store_ps(pPerspAttribs += 4, attribC);
- pPerspAttribs += 4;
- }
-
- // compute bary Z
- // zInterp = zVert0 + i(zVert1-zVert0) + j (zVert2 - zVert0)
- OSALIGNSIMD(float) a[4];
- _mm_store_ps(a, vZ);
- triDesc.Z[0] = a[0] - a[2];
- triDesc.Z[1] = a[1] - a[2];
- triDesc.Z[2] = a[2];
-
- // add depth bias
- triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
-
- // Calc bounding box of triangle
- OSALIGNSIMD(SWR_RECT) bbox;
- calcBoundingBoxInt(vXi, vYi, bbox);
-
- const SWR_RECT& scissorInFixedPoint =
- state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
-
- if (RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
- {
- // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is
- // valid
- bbox.xmin--;
- bbox.xmax++;
- bbox.ymin--;
- bbox.ymax++;
- SWR_ASSERT(scissorInFixedPoint.xmin >= 0 && scissorInFixedPoint.ymin >= 0,
- "Conservative rast degenerate handling requires a valid scissor rect");
- }
-
- // Intersect with scissor/viewport
- OSALIGNSIMD(SWR_RECT) intersect;
- intersect.xmin = std::max(bbox.xmin, scissorInFixedPoint.xmin);
- intersect.xmax = std::min(bbox.xmax - 1, scissorInFixedPoint.xmax);
- intersect.ymin = std::max(bbox.ymin, scissorInFixedPoint.ymin);
- intersect.ymax = std::min(bbox.ymax - 1, scissorInFixedPoint.ymax);
-
- triDesc.triFlags = workDesc.triFlags;
-
- // further constrain backend to intersecting bounding box of macro tile and scissored triangle
- // bbox
- uint32_t macroX, macroY;
- MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
- int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
- int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
- int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
- int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
-
- intersect.xmin = std::max(intersect.xmin, macroBoxLeft);
- intersect.ymin = std::max(intersect.ymin, macroBoxTop);
- intersect.xmax = std::min(intersect.xmax, macroBoxRight);
- intersect.ymax = std::min(intersect.ymax, macroBoxBottom);
-
- SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax &&
- intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 &&
- intersect.ymax >= 0);
-
- RDTSC_END(pDC->pContext->pBucketMgr, BETriangleSetup, 0);
-
- // update triangle desc
- uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t numTilesX = maxTileX - minTileX + 1;
- uint32_t numTilesY = maxTileY - minTileY + 1;
-
- if (numTilesX == 0 || numTilesY == 0)
- {
- RDTSC_EVENT(pDC->pContext->pBucketMgr, BEEmptyTriangle, 1, 0);
- RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1);
- return;
- }
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStepSetup, pDC->drawId);
-
- // Step to pixel center of top-left pixel of the triangle bbox
- // Align intersect bbox (top/left) to raster tile's (top/left).
- int32_t x = AlignDown(intersect.xmin, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
- int32_t y = AlignDown(intersect.ymin, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
-
- // convenience typedef
- typedef typename RT::NumCoverageSamplesT NumCoverageSamplesT;
-
- // single sample rasterization evaluates edges at pixel center,
- // multisample evaluates edges UL pixel corner and steps to each sample position
- if (std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
- {
- // Add 0.5, in fixed point, to offset to pixel center
- x += (FIXED_POINT_SCALE / 2);
- y += (FIXED_POINT_SCALE / 2);
- }
-
- __m128i vTopLeftX = _mm_set1_epi32(x);
- __m128i vTopLeftY = _mm_set1_epi32(y);
-
- // evaluate edge equations at top-left pixel using 64bit math
- //
- // line = Ax + By + C
- // solving for C:
- // C = -Ax - By
- // we know x0 and y0 are on the line; plug them in:
- // C = -Ax0 - By0
- // plug C back into line equation:
- // line = Ax - By - Ax0 - By0
- // line = A(x - x0) + B(y - y0)
- // dX = (x-x0), dY = (y-y0)
- // so all this simplifies to
- // edge = A(dX) + B(dY), our first test at the top left of the bbox we're rasterizing within
-
- __m128i vDeltaX = _mm_sub_epi32(vTopLeftX, vXi);
- __m128i vDeltaY = _mm_sub_epi32(vTopLeftY, vYi);
-
- // evaluate A(dx) and B(dY) for all points
- __m256d vAipd = _mm256_cvtepi32_pd(vAi);
- __m256d vBipd = _mm256_cvtepi32_pd(vBi);
- __m256d vDeltaXpd = _mm256_cvtepi32_pd(vDeltaX);
- __m256d vDeltaYpd = _mm256_cvtepi32_pd(vDeltaY);
-
- __m256d vAiDeltaXFix16 = _mm256_mul_pd(vAipd, vDeltaXpd);
- __m256d vBiDeltaYFix16 = _mm256_mul_pd(vBipd, vDeltaYpd);
- __m256d vEdge = _mm256_add_pd(vAiDeltaXFix16, vBiDeltaYFix16);
-
- // apply any edge adjustments(top-left, crast, etc)
- adjustEdgesFix16<RT, typename RT::ConservativeEdgeOffsetT>(vAi, vBi, vEdge);
-
- // broadcast respective edge results to all lanes
- double* pEdge = (double*)&vEdge;
- __m256d vEdgeFix16[7];
- vEdgeFix16[0] = _mm256_set1_pd(pEdge[0]);
- vEdgeFix16[1] = _mm256_set1_pd(pEdge[1]);
- vEdgeFix16[2] = _mm256_set1_pd(pEdge[2]);
-
- OSALIGNSIMD(int32_t) aAi[4], aBi[4];
- _mm_store_si128((__m128i*)aAi, vAi);
- _mm_store_si128((__m128i*)aBi, vBi);
- EDGE rastEdges[RT::NumEdgesT::value];
-
- // Compute and store triangle edge data
- ComputeEdgeData(aAi[0], aBi[0], rastEdges[0]);
- ComputeEdgeData(aAi[1], aBi[1], rastEdges[1]);
- ComputeEdgeData(aAi[2], aBi[2], rastEdges[2]);
-
- // Compute and store triangle edge data if scissor needs to rasterized
- ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>(
- bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
-
- // Evaluate edge equations at sample positions of each of the 4 corners of a raster tile
- // used to for testing if entire raster tile is inside a triangle
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- vEdgeFix16[e] = _mm256_add_pd(vEdgeFix16[e], rastEdges[e].vRasterTileOffsets);
- }
-
- // at this point vEdge has been evaluated at the UL pixel corners of raster tile bbox
- // step sample positions to the raster tile bbox of multisample points
- // min(xSamples),min(ySamples) ------ max(xSamples),min(ySamples)
- // | |
- // | |
- // min(xSamples),max(ySamples) ------ max(xSamples),max(ySamples)
- __m256d vEdgeTileBbox[3];
- if (NumCoverageSamplesT::value > 1)
- {
- const SWR_MULTISAMPLE_POS& samplePos = rastState.samplePositions;
- const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
- const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
-
- __m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
- __m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
-
- // step edge equation tests from Tile
- // used to for testing if entire raster tile is inside a triangle
- for (uint32_t e = 0; e < 3; ++e)
- {
- __m256d vResultAxFix16 =
- _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vTileSampleBBoxXFix8);
- __m256d vResultByFix16 =
- _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vTileSampleBBoxYFix8);
- vEdgeTileBbox[e] = _mm256_add_pd(vResultAxFix16, vResultByFix16);
-
- // adjust for msaa tile bbox edges outward for conservative rast, if enabled
- adjustEdgeConservative<RT, typename RT::ConservativeEdgeOffsetT>(
- vAi, vBi, vEdgeTileBbox[e]);
- }
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BEStepSetup, 0);
-
- uint32_t tY = minTileY;
- uint32_t tX = minTileX;
- uint32_t maxY = maxTileY;
- uint32_t maxX = maxTileX;
-
- RenderOutputBuffers renderBuffers, currentRenderBufferRow;
- GetRenderHotTiles<RT::MT::numSamples>(pDC,
- workerId,
- macroTile,
- minTileX,
- minTileY,
- renderBuffers,
- triDesc.triFlags.renderTargetArrayIndex);
- currentRenderBufferRow = renderBuffers;
-
- // rasterize and generate coverage masks per sample
- for (uint32_t tileY = tY; tileY <= maxY; ++tileY)
- {
- __m256d vStartOfRowEdge[RT::NumEdgesT::value];
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- vStartOfRowEdge[e] = vEdgeFix16[e];
- }
-
- for (uint32_t tileX = tX; tileX <= maxX; ++tileX)
- {
- triDesc.anyCoveredSamples = 0;
-
- // is the corner of the edge outside of the raster tile? (vEdge < 0)
- int mask0, mask1, mask2;
- UpdateEdgeMasks<NumCoverageSamplesT>(vEdgeTileBbox, vEdgeFix16, mask0, mask1, mask2);
-
- for (uint32_t sampleNum = 0; sampleNum < NumCoverageSamplesT::value; sampleNum++)
- {
- // trivial reject, at least one edge has all 4 corners of raster tile outside
- bool trivialReject =
- TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
-
- if (!trivialReject)
- {
- // trivial accept mask
- triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL;
-
- // Update the raster tile edge masks based on inner conservative edge offsets,
- // if enabled
- UpdateEdgeMasksInnerConservative<RT,
- typename RT::ValidEdgeMaskT,
- typename RT::InputCoverageT>(
- vEdgeTileBbox, vEdgeFix16, vAi, vBi, mask0, mask1, mask2);
-
- // @todo Make this a bit smarter to allow use of trivial accept when:
- // 1) scissor/vp intersection rect is raster tile aligned
- // 2) raster tile is entirely within scissor/vp intersection rect
- if (TrivialAcceptTest<typename RT::RasterizeScissorEdgesT>(mask0, mask1, mask2))
- {
- // trivial accept, all 4 corners of all 3 edges are negative
- // i.e. raster tile completely inside triangle
- triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum];
- if (std::is_same<typename RT::InputCoverageT,
- InnerConservativeCoverageT>::value)
- {
- triDesc.innerCoverageMask = 0xffffffffffffffffULL;
- }
- RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialAccept, 1, 0);
- }
- else
- {
- __m256d vEdgeAtSample[RT::NumEdgesT::value];
- if (std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
- {
- // should get optimized out for single sample case (global value
- // numbering or copy propagation)
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- vEdgeAtSample[e] = vEdgeFix16[e];
- }
- }
- else
- {
- const SWR_MULTISAMPLE_POS& samplePos = rastState.samplePositions;
- __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
- __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
- __m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
- __m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
-
- // step edge equation tests from UL tile corner to pixel sample position
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- __m256d vResultAxFix16 =
- _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vSampleOffsetX);
- __m256d vResultByFix16 =
- _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vSampleOffsetY);
- vEdgeAtSample[e] = _mm256_add_pd(vResultAxFix16, vResultByFix16);
- vEdgeAtSample[e] = _mm256_add_pd(vEdgeFix16[e], vEdgeAtSample[e]);
- }
- }
-
- double startQuadEdges[RT::NumEdgesT::value];
- const __m256i vLane0Mask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- _mm256_maskstore_pd(&startQuadEdges[e], vLane0Mask, vEdgeAtSample[e]);
- }
-
- // not trivial accept or reject, must rasterize full tile
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId);
- triDesc.coverageMask[sampleNum] =
- rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
- pDC, startQuadEdges, rastEdges);
- RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0);
-
- triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
-
- // Output SV InnerCoverage, if needed
- GenerateSVInnerCoverage<RT,
- typename RT::ValidEdgeMaskT,
- typename RT::InputCoverageT>(
- pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
- }
- }
- else
- {
- // if we're calculating coverage per sample, need to store it off. otherwise no
- // covered samples, don't need to do anything
- if (NumCoverageSamplesT::value > 1)
- {
- triDesc.coverageMask[sampleNum] = 0;
- }
- RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialReject, 1, 0);
- }
- }
-
-#if KNOB_ENABLE_TOSS_POINTS
- if (KNOB_TOSS_RS)
- {
- gToss = triDesc.coverageMask[0];
- }
- else
-#endif
- if (triDesc.anyCoveredSamples)
- {
- // if conservative rast and MSAA are enabled, conservative coverage for a pixel
- // means all samples in that pixel are covered copy conservative coverage result to
- // all samples
- if (RT::IsConservativeT::value)
- {
- auto copyCoverage = [&](int sample) {
- triDesc.coverageMask[sample] = triDesc.coverageMask[0];
- };
- UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
- }
-
- // Track rasterized subspans
- AR_EVENT(RasterTileCount(pDC->drawId, 1));
-
- RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
- backendFuncs.pfnBackend(pDC,
- workerId,
- tileX << KNOB_TILE_X_DIM_SHIFT,
- tileY << KNOB_TILE_Y_DIM_SHIFT,
- triDesc,
- renderBuffers);
- RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
- }
-
- // step to the next tile in X
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- vEdgeFix16[e] =
- _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
- }
- StepRasterTileX<RT>(state.colorHottileEnable, renderBuffers);
- }
-
- // step to the next tile in Y
- for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
- {
- vEdgeFix16[e] =
- _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
- }
- StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
- }
-
- RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1);
-}
-
-// Get pointers to hot tile memory for color RT, depth, stencil
-template <uint32_t numSamples>
-void GetRenderHotTiles(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroID,
- uint32_t tileX,
- uint32_t tileY,
- RenderOutputBuffers& renderBuffers,
- uint32_t renderTargetArrayIndex)
-{
- const API_STATE& state = GetApiState(pDC);
- SWR_CONTEXT* pContext = pDC->pContext;
- HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- uint32_t mx, my;
- MacroTileMgr::getTileIndices(macroID, mx, my);
- tileX -= KNOB_MACROTILE_X_DIM_IN_TILES * mx;
- tileY -= KNOB_MACROTILE_Y_DIM_IN_TILES * my;
-
- // compute tile offset for active hottile buffers
- const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
- uint32_t offset = ComputeTileOffset2D<
- TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp>>(
- pitch, tileX, tileY);
- offset *= numSamples;
-
- unsigned long rtSlot = 0;
- uint32_t colorHottileEnableMask = state.colorHottileEnable;
- while (_BitScanForward(&rtSlot, colorHottileEnableMask))
- {
- HOTTILE* pColor = pContext->pHotTileMgr->GetHotTile(
- pContext,
- pDC,
- hWorkerPrivateData,
- macroID,
- (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
- true,
- numSamples,
- renderTargetArrayIndex);
- renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
- renderBuffers.pColorHotTile[rtSlot] = pColor;
-
- colorHottileEnableMask &= ~(1 << rtSlot);
- }
- if (state.depthHottileEnable)
- {
- const uint32_t pitch =
- KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
- uint32_t offset = ComputeTileOffset2D<
- TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp>>(
- pitch, tileX, tileY);
- offset *= numSamples;
- HOTTILE* pDepth = pContext->pHotTileMgr->GetHotTile(pContext,
- pDC,
- hWorkerPrivateData,
- macroID,
- SWR_ATTACHMENT_DEPTH,
- true,
- numSamples,
- renderTargetArrayIndex);
- pDepth->state = HOTTILE_DIRTY;
- SWR_ASSERT(pDepth->pBuffer != nullptr);
- renderBuffers.pDepth = pDepth->pBuffer + offset;
- renderBuffers.pDepthHotTile = pDepth;
- }
- if (state.stencilHottileEnable)
- {
- const uint32_t pitch =
- KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
- uint32_t offset = ComputeTileOffset2D<
- TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp>>(
- pitch, tileX, tileY);
- offset *= numSamples;
- HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext,
- pDC,
- hWorkerPrivateData,
- macroID,
- SWR_ATTACHMENT_STENCIL,
- true,
- numSamples,
- renderTargetArrayIndex);
- pStencil->state = HOTTILE_DIRTY;
- SWR_ASSERT(pStencil->pBuffer != nullptr);
- renderBuffers.pStencil = pStencil->pBuffer + offset;
- renderBuffers.pStencilHotTile = pStencil;
- }
-}
-
-template <typename RT>
-INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers)
-{
- unsigned long rt = 0;
- while (_BitScanForward(&rt, colorHotTileMask))
- {
- colorHotTileMask &= ~(1 << rt);
- buffers.pColor[rt] += RT::colorRasterTileStep;
- }
-
- buffers.pDepth += RT::depthRasterTileStep;
- buffers.pStencil += RT::stencilRasterTileStep;
-}
-
-template <typename RT>
-INLINE void StepRasterTileY(uint32_t colorHotTileMask,
- RenderOutputBuffers& buffers,
- RenderOutputBuffers& startBufferRow)
-{
- unsigned long rt = 0;
- while (_BitScanForward(&rt, colorHotTileMask))
- {
- colorHotTileMask &= ~(1 << rt);
- startBufferRow.pColor[rt] += RT::colorRasterTileRowStep;
- buffers.pColor[rt] = startBufferRow.pColor[rt];
- }
- startBufferRow.pDepth += RT::depthRasterTileRowStep;
- buffers.pDepth = startBufferRow.pDepth;
-
- startBufferRow.pStencil += RT::stencilRasterTileRowStep;
- buffers.pStencil = startBufferRow.pStencil;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
deleted file mode 100644
index 6329b2ec98e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#include "rdtsc_core.h"
-#include "common/rdtsc_buckets.h"
-
-// must match CORE_BUCKETS enum order
-BUCKET_DESC gCoreBuckets[] = {
- {"APIClearRenderTarget", "", true, 0xff0b8bea},
- {"APIDraw", "", true, 0xff000066},
- {"APIDrawWakeAllThreads", "", false, 0xffffffff},
- {"APIDrawIndexed", "", true, 0xff000066},
- {"APIDispatch", "", true, 0xff660000},
- {"APIStoreTiles", "", true, 0xff00ffff},
- {"APIGetDrawContext", "", false, 0xffffffff},
- {"APISync", "", true, 0xff6666ff},
- {"APIWaitForIdle", "", true, 0xff0000ff},
- {"FEProcessDraw", "", true, 0xff009900},
- {"FEProcessDrawIndexed", "", true, 0xff009900},
- {"FEFetchShader", "", false, 0xffffffff},
- {"FEVertexShader", "", false, 0xffffffff},
- {"FEHullShader", "", false, 0xffffffff},
- {"FETessellation", "", false, 0xffffffff},
- {"FEDomainShader", "", false, 0xffffffff},
- {"FEGeometryShader", "", false, 0xffffffff},
- {"FEStreamout", "", false, 0xffffffff},
- {"FEPAAssemble", "", false, 0xffffffff},
- {"FEBinPoints", "", false, 0xff29b854},
- {"FEBinLines", "", false, 0xff29b854},
- {"FEBinTriangles", "", false, 0xff29b854},
- {"FETriangleSetup", "", false, 0xffffffff},
- {"FEViewportCull", "", false, 0xffffffff},
- {"FEGuardbandClip", "", false, 0xffffffff},
- {"FEClipPoints", "", false, 0xffffffff},
- {"FEClipLines", "", false, 0xffffffff},
- {"FEClipTriangles", "", false, 0xffffffff},
- {"FEClipRectangles", "", false, 0xffffffff},
- {"FECullZeroAreaAndBackface", "", false, 0xffffffff},
- {"FECullBetweenCenters", "", false, 0xffffffff},
- {"FEEarlyRastEnter", "", false, 0xffffffff},
- {"FEEarlyRastExit", "", false, 0xffffffff},
- {"FEProcessStoreTiles", "", true, 0xff39c864},
- {"FEProcessInvalidateTiles", "", true, 0xffffffff},
- {"WorkerWorkOnFifoBE", "", false, 0xff40261c},
- {"WorkerFoundWork", "", false, 0xff573326},
- {"BELoadTiles", "", true, 0xffb0e2ff},
- {"BEDispatch", "", true, 0xff00a2ff},
- {"BEClear", "", true, 0xff00ccbb},
- {"BERasterizeLine", "", true, 0xffb26a4e},
- {"BERasterizeTriangle", "", true, 0xffb26a4e},
- {"BETriangleSetup", "", false, 0xffffffff},
- {"BEStepSetup", "", false, 0xffffffff},
- {"BECullZeroArea", "", false, 0xffffffff},
- {"BEEmptyTriangle", "", false, 0xffffffff},
- {"BETrivialAccept", "", false, 0xffffffff},
- {"BETrivialReject", "", false, 0xffffffff},
- {"BERasterizePartial", "", false, 0xffffffff},
- {"BEPixelBackend", "", false, 0xffffffff},
- {"BESetup", "", false, 0xffffffff},
- {"BEBarycentric", "", false, 0xffffffff},
- {"BEEarlyDepthTest", "", false, 0xffffffff},
- {"BEPixelShader", "", false, 0xffffffff},
- {"BESingleSampleBackend", "", false, 0xffffffff},
- {"BEPixelRateBackend", "", false, 0xffffffff},
- {"BESampleRateBackend", "", false, 0xffffffff},
- {"BENullBackend", "", false, 0xffffffff},
- {"BELateDepthTest", "", false, 0xffffffff},
- {"BEOutputMerger", "", false, 0xffffffff},
- {"BEStoreTiles", "", true, 0xff00cccc},
- {"BEEndTile", "", false, 0xffffffff},
-};
-static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])),
- "RDTSC Bucket enum and description table size mismatched.");
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
deleted file mode 100644
index 0228275bd47..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#pragma once
-#include "knobs.h"
-
-#include "common/os.h"
-#include "common/rdtsc_buckets.h"
-
-#include <vector>
-
-///////////////////////////////////////////////////////////////////////////////
-// NOTE: This enum MUST be kept in sync with gCoreBuckets in rdtsc_core.cpp
-///////////////////////////////////////////////////////////////////////////////
-enum CORE_BUCKETS
-{
- APIClearRenderTarget,
- APIDraw,
- APIDrawWakeAllThreads,
- APIDrawIndexed,
- APIDispatch,
- APIStoreTiles,
- APIGetDrawContext,
- APISync,
- APIWaitForIdle,
- FEProcessDraw,
- FEProcessDrawIndexed,
- FEFetchShader,
- FEVertexShader,
- FEHullShader,
- FETessellation,
- FEDomainShader,
- FEGeometryShader,
- FEStreamout,
- FEPAAssemble,
- FEBinPoints,
- FEBinLines,
- FEBinTriangles,
- FETriangleSetup,
- FEViewportCull,
- FEGuardbandClip,
- FEClipPoints,
- FEClipLines,
- FEClipTriangles,
- FEClipRectangles,
- FECullZeroAreaAndBackface,
- FECullBetweenCenters,
- FEEarlyRastEnter,
- FEEarlyRastExit,
- FEProcessStoreTiles,
- FEProcessInvalidateTiles,
- WorkerWorkOnFifoBE,
- WorkerFoundWork,
- BELoadTiles,
- BEDispatch,
- BEClear,
- BERasterizeLine,
- BERasterizeTriangle,
- BETriangleSetup,
- BEStepSetup,
- BECullZeroArea,
- BEEmptyTriangle,
- BETrivialAccept,
- BETrivialReject,
- BERasterizePartial,
- BEPixelBackend,
- BESetup,
- BEBarycentric,
- BEEarlyDepthTest,
- BEPixelShader,
- BESingleSampleBackend,
- BEPixelRateBackend,
- BESampleRateBackend,
- BENullBackend,
- BELateDepthTest,
- BEOutputMerger,
- BEStoreTiles,
- BEEndTile,
-
- NumBuckets
-};
-
-void rdtscReset(BucketManager* pBucketMgr);
-void rdtscInit(BucketManager* pBucketMgr, int threadId);
-void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId);
-void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId);
-void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2);
-void rdtscEndFrame(BucketManager* pBucketMgr);
-
-#ifdef KNOB_ENABLE_RDTSC
-#define RDTSC_RESET(pBucketMgr) rdtscReset(pBucketMgr)
-#define RDTSC_INIT(pBucketMgr, threadId) rdtscInit(pBucketMgr,threadId)
-#define RDTSC_START(pBucketMgr, bucket) rdtscStart(pBucketMgr, bucket)
-#define RDTSC_STOP(pBucketMgr, bucket, count, draw) rdtscStop(pBucketMgr, bucket, count, draw)
-#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) rdtscEvent(pBucketMgr, bucket, count1, count2)
-#define RDTSC_ENDFRAME(pBucketMgr) rdtscEndFrame(pBucketMgr)
-#else
-#define RDTSC_RESET(pBucketMgr)
-#define RDTSC_INIT(pBucketMgr, threadId)
-#define RDTSC_START(pBucketMgr, bucket)
-#define RDTSC_STOP(pBucketMgr, bucket, count, draw)
-#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2)
-#define RDTSC_ENDFRAME(pBucketMgr)
-#endif
-
-extern BUCKET_DESC gCoreBuckets[];
-
-INLINE void rdtscReset(BucketManager *pBucketMgr)
-{
- pBucketMgr->mCurrentFrame = 0;
- pBucketMgr->ClearThreads();
-}
-
-INLINE void rdtscInit(BucketManager* pBucketMgr, int threadId)
-{
- // register all the buckets once
- if (!pBucketMgr->mBucketsInitialized && (threadId == 0))
- {
- pBucketMgr->mBucketMap.resize(NumBuckets);
- for (uint32_t i = 0; i < NumBuckets; ++i)
- {
- pBucketMgr->mBucketMap[i] = pBucketMgr->RegisterBucket(gCoreBuckets[i]);
- }
- pBucketMgr->mBucketsInitialized = true;
- }
-
- std::string name = threadId == 0 ? "API" : "WORKER";
- pBucketMgr->RegisterThread(name);
-}
-
-INLINE void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId)
-{
- uint32_t id = pBucketMgr->mBucketMap[bucketId];
- pBucketMgr->StartBucket(id);
-}
-
-INLINE void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId)
-{
- uint32_t id = pBucketMgr->mBucketMap[bucketId];
- pBucketMgr->StopBucket(id);
-}
-
-INLINE void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2)
-{
- uint32_t id = pBucketMgr->mBucketMap[bucketId];
- pBucketMgr->AddEvent(id, count1);
-}
-
-INLINE void rdtscEndFrame(BucketManager* pBucketMgr)
-{
- pBucketMgr->mCurrentFrame++;
-
- if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_START_FRAME &&
- KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
- {
- pBucketMgr->StartCapture();
- }
-
- if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_END_FRAME &&
- KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
- {
- pBucketMgr->StopCapture();
- pBucketMgr->PrintReport("rdtsc.txt");
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h
deleted file mode 100644
index 2e758f43753..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file arena.h
- *
- * @brief RingBuffer
- * The RingBuffer class manages all aspects of the ring buffer including
- * the head/tail indices, etc.
- *
- ******************************************************************************/
-#pragma once
-
-template <typename T>
-class RingBuffer
-{
-public:
- RingBuffer() : mpRingBuffer(nullptr), mNumEntries(0), mRingHead(0), mRingTail(0) {}
-
- ~RingBuffer() { Destroy(); }
-
- void Init(uint32_t numEntries)
- {
- SWR_ASSERT(numEntries > 0);
- SWR_ASSERT(((1ULL << 32) % numEntries) == 0,
- "%d is not evenly divisible into 2 ^ 32. Wrap errors will occur!",
- numEntries);
- mNumEntries = numEntries;
- mpRingBuffer = (T*)AlignedMalloc(sizeof(T) * numEntries, 64);
- SWR_ASSERT(mpRingBuffer != nullptr);
- memset((void*)mpRingBuffer, 0, sizeof(T) * numEntries);
- }
-
- void Destroy()
- {
- AlignedFree(mpRingBuffer);
- mpRingBuffer = nullptr;
- }
-
- T& operator[](const uint32_t index)
- {
- SWR_ASSERT(index < mNumEntries);
- return mpRingBuffer[index];
- }
-
- INLINE void Enqueue()
- {
- mRingHead++; // There's only one producer.
- // Assert to find wrap-around cases, NEVER ENABLE DURING CHECKIN!!
- // SWR_REL_ASSERT(mRingHead);
- }
-
- INLINE void Dequeue()
- {
- InterlockedIncrement(&mRingTail); // There are multiple consumers.
- }
-
- INLINE bool IsEmpty() { return (GetHead() == GetTail()); }
-
- INLINE bool IsFull()
- {
- uint32_t numEnqueued = GetHead() - GetTail();
- SWR_ASSERT(numEnqueued <= mNumEntries);
-
- return (numEnqueued == mNumEntries);
- }
-
- INLINE uint32_t GetTail() volatile { return mRingTail; }
- INLINE uint32_t GetHead() volatile { return mRingHead; }
-
-protected:
- T* mpRingBuffer;
- uint32_t mNumEntries;
-
- OSALIGNLINE(volatile uint32_t) mRingHead; // Consumer Counter
- OSALIGNLINE(volatile uint32_t) mRingTail; // Producer Counter
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
deleted file mode 100644
index 66a23bd9b08..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ /dev/null
@@ -1,1240 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file state.h
- *
- * @brief Definitions for API state.
- *
- ******************************************************************************/
-// Skipping clang-format due to parsing by simplistic python scripts
-// clang-format off
-#pragma once
-
-#include "common/formats.h"
-#include "common/intrin.h"
-#include "common/rdtsc_buckets.h"
-#include <functional>
-#include <algorithm>
-
-using gfxptr_t = unsigned long long;
-
-//////////////////////////////////////////////////////////////////////////
-/// PRIMITIVE_TOPOLOGY.
-//////////////////////////////////////////////////////////////////////////
-enum PRIMITIVE_TOPOLOGY
-{
- TOP_UNKNOWN = 0x0,
- TOP_POINT_LIST = 0x1,
- TOP_LINE_LIST = 0x2,
- TOP_LINE_STRIP = 0x3,
- TOP_TRIANGLE_LIST = 0x4,
- TOP_TRIANGLE_STRIP = 0x5,
- TOP_TRIANGLE_FAN = 0x6,
- TOP_QUAD_LIST = 0x7,
- TOP_QUAD_STRIP = 0x8,
- TOP_LINE_LIST_ADJ = 0x9,
- TOP_LISTSTRIP_ADJ = 0xA,
- TOP_TRI_LIST_ADJ = 0xB,
- TOP_TRI_STRIP_ADJ = 0xC,
- TOP_TRI_STRIP_REVERSE = 0xD,
- TOP_POLYGON = 0xE,
- TOP_RECT_LIST = 0xF,
- TOP_LINE_LOOP = 0x10,
- TOP_POINT_LIST_BF = 0x11,
- TOP_LINE_STRIP_CONT = 0x12,
- TOP_LINE_STRIP_BF = 0x13,
- TOP_LINE_STRIP_CONT_BF = 0x14,
- TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
- TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
-
- TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
- TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
- TOP_PATCHLIST_2 = 0x21,
- TOP_PATCHLIST_3 = 0x22,
- TOP_PATCHLIST_4 = 0x23,
- TOP_PATCHLIST_5 = 0x24,
- TOP_PATCHLIST_6 = 0x25,
- TOP_PATCHLIST_7 = 0x26,
- TOP_PATCHLIST_8 = 0x27,
- TOP_PATCHLIST_9 = 0x28,
- TOP_PATCHLIST_10 = 0x29,
- TOP_PATCHLIST_11 = 0x2A,
- TOP_PATCHLIST_12 = 0x2B,
- TOP_PATCHLIST_13 = 0x2C,
- TOP_PATCHLIST_14 = 0x2D,
- TOP_PATCHLIST_15 = 0x2E,
- TOP_PATCHLIST_16 = 0x2F,
- TOP_PATCHLIST_17 = 0x30,
- TOP_PATCHLIST_18 = 0x31,
- TOP_PATCHLIST_19 = 0x32,
- TOP_PATCHLIST_20 = 0x33,
- TOP_PATCHLIST_21 = 0x34,
- TOP_PATCHLIST_22 = 0x35,
- TOP_PATCHLIST_23 = 0x36,
- TOP_PATCHLIST_24 = 0x37,
- TOP_PATCHLIST_25 = 0x38,
- TOP_PATCHLIST_26 = 0x39,
- TOP_PATCHLIST_27 = 0x3A,
- TOP_PATCHLIST_28 = 0x3B,
- TOP_PATCHLIST_29 = 0x3C,
- TOP_PATCHLIST_30 = 0x3D,
- TOP_PATCHLIST_31 = 0x3E,
- TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_SHADER_TYPE
-//////////////////////////////////////////////////////////////////////////
-enum SWR_SHADER_TYPE
-{
- SHADER_VERTEX,
- SHADER_GEOMETRY,
- SHADER_DOMAIN,
- SHADER_HULL,
- SHADER_PIXEL,
- SHADER_COMPUTE,
-
- NUM_SHADER_TYPES,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_RENDERTARGET_ATTACHMENT
-/// @todo Its not clear what an "attachment" means. Its not common term.
-//////////////////////////////////////////////////////////////////////////
-enum SWR_RENDERTARGET_ATTACHMENT
-{
- SWR_ATTACHMENT_COLOR0,
- SWR_ATTACHMENT_COLOR1,
- SWR_ATTACHMENT_COLOR2,
- SWR_ATTACHMENT_COLOR3,
- SWR_ATTACHMENT_COLOR4,
- SWR_ATTACHMENT_COLOR5,
- SWR_ATTACHMENT_COLOR6,
- SWR_ATTACHMENT_COLOR7,
- SWR_ATTACHMENT_DEPTH,
- SWR_ATTACHMENT_STENCIL,
-
- SWR_NUM_ATTACHMENTS
-};
-
-#define SWR_NUM_RENDERTARGETS 8
-
-#define SWR_ATTACHMENT_COLOR0_BIT 0x001
-#define SWR_ATTACHMENT_COLOR1_BIT 0x002
-#define SWR_ATTACHMENT_COLOR2_BIT 0x004
-#define SWR_ATTACHMENT_COLOR3_BIT 0x008
-#define SWR_ATTACHMENT_COLOR4_BIT 0x010
-#define SWR_ATTACHMENT_COLOR5_BIT 0x020
-#define SWR_ATTACHMENT_COLOR6_BIT 0x040
-#define SWR_ATTACHMENT_COLOR7_BIT 0x080
-#define SWR_ATTACHMENT_DEPTH_BIT 0x100
-#define SWR_ATTACHMENT_STENCIL_BIT 0x200
-#define SWR_ATTACHMENT_MASK_ALL 0x3ff
-#define SWR_ATTACHMENT_MASK_COLOR 0x0ff
-
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SWR Inner Tessellation factor ID
-/// See above GetTessFactorOutputPosition code for documentation
-enum SWR_INNER_TESSFACTOR_ID
-{
- SWR_QUAD_U_TRI_INSIDE,
- SWR_QUAD_V_INSIDE,
-
- SWR_NUM_INNER_TESS_FACTORS,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief SWR Outer Tessellation factor ID
-/// See above GetTessFactorOutputPosition code for documentation
-enum SWR_OUTER_TESSFACTOR_ID
-{
- SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
- SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY,
- SWR_QUAD_V_EQ0_TRI_W,
- SWR_QUAD_V_EQ1,
-
- SWR_NUM_OUTER_TESS_FACTORS,
-};
-
-/////////////////////////////////////////////////////////////////////////
-/// simdvertex
-/// @brief Defines a vertex element that holds all the data for SIMD vertices.
-/// Contains space for position, SGV, and 32 generic attributes
-/////////////////////////////////////////////////////////////////////////
-enum SWR_VTX_SLOTS
-{
- VERTEX_SGV_SLOT = 0,
- VERTEX_SGV_RTAI_COMP = 0,
- VERTEX_SGV_VAI_COMP = 1,
- VERTEX_SGV_POINT_SIZE_COMP = 2,
- VERTEX_POSITION_SLOT = 1,
- VERTEX_POSITION_END_SLOT = 1,
- VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
- VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
- VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT),
- VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT),
- SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT)
-};
-
-// SoAoSoA
-struct simdvertex
-{
- simdvector attrib[SWR_VTX_NUM_SLOTS];
-};
-
-struct simd16vertex
-{
- simd16vector attrib[SWR_VTX_NUM_SLOTS];
-};
-
-template <typename SIMD_T>
-struct SIMDVERTEX_T
-{
- typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
-};
-
-struct SWR_WORKER_DATA
-{
- HANDLE hArContext; // handle to the archrast context
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_SHADER_STATS
-/// @brief Structure passed to shader for stats collection.
-/////////////////////////////////////////////////////////////////////////
-struct SWR_SHADER_STATS
-{
- uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
- uint32_t numSampleExecuted;
- uint32_t numSampleLExecuted;
- uint32_t numSampleBExecuted;
- uint32_t numSampleCExecuted;
- uint32_t numSampleCLZExecuted;
- uint32_t numSampleCDExecuted;
- uint32_t numGather4Executed;
- uint32_t numGather4CExecuted;
- uint32_t numGather4CPOExecuted;
- uint32_t numGather4CPOCExecuted;
- uint32_t numLodExecuted;
-};
-
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_VS_CONTEXT
-/// @brief Input to vertex shader
-/////////////////////////////////////////////////////////////////////////
-struct SWR_VS_CONTEXT
-{
- simdvertex* pVin; // IN: SIMD input vertex data store
- simdvertex* pVout; // OUT: SIMD output vertex data store
-
- uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
- simdscalari VertexID; // IN: Vertex ID
- simdscalari mask; // IN: Active mask for shader
-
- // SIMD16 Frontend fields.
- uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in
- // simd16vertex output
- simd16scalari mask16; // IN: Active mask for shader (16-wide)
- simd16scalari VertexID16; // IN: Vertex ID (16-wide)
-
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
-};
-
-/////////////////////////////////////////////////////////////////////////
-/// ScalarCPoint
-/// @brief defines a control point element as passed from the output
-/// of the hull shader to the input of the domain shader
-/////////////////////////////////////////////////////////////////////////
-struct ScalarAttrib
-{
- float x;
- float y;
- float z;
- float w;
-};
-
-struct ScalarCPoint
-{
- ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TESSELLATION_FACTORS
-/// @brief Tessellation factors structure (non-vector)
-/////////////////////////////////////////////////////////////////////////
-struct SWR_TESSELLATION_FACTORS
-{
- float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
- float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
- float pad[2];
-};
-
-SWR_STATIC_ASSERT(sizeof(SWR_TESSELLATION_FACTORS) == 32);
-
-#define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
-struct ScalarPatch
-{
- SWR_TESSELLATION_FACTORS tessFactors;
- ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
- ScalarCPoint patchData;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_HS_CONTEXT
-/// @brief Input to hull shader
-/////////////////////////////////////////////////////////////////////////
-struct SWR_HS_CONTEXT
-{
- simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
- simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
- simdscalari mask; // IN: Active mask for shader
- uint32_t outputSize; // IN: Size of HS output (per lane)
- ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_DS_CONTEXT
-/// @brief Input to domain shader
-/////////////////////////////////////////////////////////////////////////
-struct SWR_DS_CONTEXT
-{
- uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
- uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data.
- uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component
- uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
- ScalarPatch* pCpIn; // IN: (SCALAR) Control patch
- simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords
- simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords
- simdscalari mask; // IN: Active mask for shader
- simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_GS_CONTEXT
-/// @brief Input to geometry shader.
-/////////////////////////////////////////////////////////////////////////
-struct SWR_GS_CONTEXT
-{
- simdvector* pVerts; // IN: input primitive data for SIMD prims
- uint32_t inputVertStride; // IN: input vertex stride, in attributes
- simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
- uint32_t InstanceID; // IN: input instance ID
- simdscalari mask; // IN: Active mask for shader
- uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
-};
-
-struct PixelPositions
-{
- simdscalar UL;
- simdscalar center;
- simdscalar sample;
- simdscalar centroid;
-};
-
-#define SWR_MAX_NUM_MULTISAMPLES 16
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_PS_CONTEXT
-/// @brief Input to pixel shader.
-/////////////////////////////////////////////////////////////////////////
-struct SWR_PS_CONTEXT
-{
- PixelPositions vX; // IN: x location(s) of pixels
- PixelPositions vY; // IN: x location(s) of pixels
- simdscalar vZ; // INOUT: z location of pixels
- simdscalari activeMask; // OUT: mask for kill
- simdscalar inputMask; // IN: input coverage mask for all samples
- simdscalari oMask; // OUT: mask for output coverage
-
- PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
- PixelPositions vJ;
- PixelPositions vOneOverW; // IN: 1/w
-
- const float* pAttribs; // IN: pointer to attribute barycentric coefficients
- const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
- const float* pRecipW; // IN: pointer to 1/w coord for each vertex
- const float* I; // IN: Barycentric A, B, and C coefs used to compute I
- const float* J; // IN: Barycentric A, B, and C coefs used to compute J
- float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
- const float* pSamplePosX; // IN: array of sample positions
- const float* pSamplePosY; // IN: array of sample positions
- simdvector shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget
-
- uint32_t frontFace; // IN: front- 1, back- 0
- uint32_t sampleIndex; // IN: sampleIndex
- uint32_t renderTargetArrayIndex; // IN: render target array index from GS
- uint32_t viewportIndex; // IN: viewport index from GS
- uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
-
- uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
-
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
-
- BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets.
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_CS_CONTEXT
-/// @brief Input to compute shader.
-/////////////////////////////////////////////////////////////////////////
-struct SWR_CS_CONTEXT
-{
- // The ThreadGroupId is the current thread group index relative
- // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
- // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
-
- // Compute shader accepts the following system values.
- // o ThreadId - Current thread id relative to all other threads in dispatch.
- // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
- // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
- // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
- //
- // All of these system values can be computed in the shader. They will be
- // derived from the current tile counter. The tile counter is an atomic counter that
- // resides in the draw context and is initialized to the product of the dispatch dims.
- //
- // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
- //
- // Each CPU worker thread will atomically decrement this counter and passes the current
- // count into the shader. When the count reaches 0 then all thread groups in the
- // dispatch call have been completed.
-
- uint32_t tileCounter; // The tile counter value for this thread group.
-
- // Dispatch dimensions used by shader to compute system values from the tile counter.
- uint32_t dispatchDims[3];
-
- uint8_t* pTGSM; // Thread Group Shared Memory pointer.
- uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
- uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is
- // responsible for subdividing scratch space per instance/simd
- uint32_t scratchSpacePerWarp; // Scratch space per work item x SIMD_WIDTH
-
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
-};
-
-// enums
-enum SWR_TILE_MODE
-{
- SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
- SWR_TILE_MODE_WMAJOR, // W major tiling
- SWR_TILE_MODE_XMAJOR, // X major tiling
- SWR_TILE_MODE_YMAJOR, // Y major tiling
- SWR_TILE_SWRZ, // SWR-Z tiling
-
-
- SWR_TILE_MODE_COUNT
-};
-
-enum SWR_SURFACE_TYPE
-{
- SURFACE_1D = 0,
- SURFACE_2D = 1,
- SURFACE_3D = 2,
- SURFACE_CUBE = 3,
- SURFACE_BUFFER = 4,
- SURFACE_STRUCTURED_BUFFER = 5,
- SURFACE_NULL = 7
-};
-
-enum SWR_ZFUNCTION
-{
- ZFUNC_ALWAYS,
- ZFUNC_NEVER,
- ZFUNC_LT,
- ZFUNC_EQ,
- ZFUNC_LE,
- ZFUNC_GT,
- ZFUNC_NE,
- ZFUNC_GE,
- NUM_ZFUNC
-};
-
-enum SWR_STENCILOP
-{
- STENCILOP_KEEP,
- STENCILOP_ZERO,
- STENCILOP_REPLACE,
- STENCILOP_INCRSAT,
- STENCILOP_DECRSAT,
- STENCILOP_INCR,
- STENCILOP_DECR,
- STENCILOP_INVERT
-};
-
-enum SWR_BLEND_FACTOR
-{
- BLENDFACTOR_ONE,
- BLENDFACTOR_SRC_COLOR,
- BLENDFACTOR_SRC_ALPHA,
- BLENDFACTOR_DST_ALPHA,
- BLENDFACTOR_DST_COLOR,
- BLENDFACTOR_SRC_ALPHA_SATURATE,
- BLENDFACTOR_CONST_COLOR,
- BLENDFACTOR_CONST_ALPHA,
- BLENDFACTOR_SRC1_COLOR,
- BLENDFACTOR_SRC1_ALPHA,
- BLENDFACTOR_ZERO,
- BLENDFACTOR_INV_SRC_COLOR,
- BLENDFACTOR_INV_SRC_ALPHA,
- BLENDFACTOR_INV_DST_ALPHA,
- BLENDFACTOR_INV_DST_COLOR,
- BLENDFACTOR_INV_CONST_COLOR,
- BLENDFACTOR_INV_CONST_ALPHA,
- BLENDFACTOR_INV_SRC1_COLOR,
- BLENDFACTOR_INV_SRC1_ALPHA
-};
-
-enum SWR_BLEND_OP
-{
- BLENDOP_ADD,
- BLENDOP_SUBTRACT,
- BLENDOP_REVSUBTRACT,
- BLENDOP_MIN,
- BLENDOP_MAX,
-};
-
-enum SWR_LOGIC_OP
-{
- LOGICOP_CLEAR,
- LOGICOP_NOR,
- LOGICOP_AND_INVERTED,
- LOGICOP_COPY_INVERTED,
- LOGICOP_AND_REVERSE,
- LOGICOP_INVERT,
- LOGICOP_XOR,
- LOGICOP_NAND,
- LOGICOP_AND,
- LOGICOP_EQUIV,
- LOGICOP_NOOP,
- LOGICOP_OR_INVERTED,
- LOGICOP_COPY,
- LOGICOP_OR_REVERSE,
- LOGICOP_OR,
- LOGICOP_SET,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_AUX_MODE
-/// @brief Specifies how the auxiliary buffer is used by the driver.
-//////////////////////////////////////////////////////////////////////////
-enum SWR_AUX_MODE
-{
- AUX_MODE_NONE,
- AUX_MODE_COLOR,
- AUX_MODE_UAV,
- AUX_MODE_DEPTH,
-};
-
-// vertex fetch state
-// WARNING- any changes to this struct need to be reflected
-// in the fetch shader jit
-struct SWR_VERTEX_BUFFER_STATE
-{
- gfxptr_t xpData;
- uint32_t index;
- uint32_t pitch;
- uint32_t size;
- uint32_t minVertex; // min vertex (for bounds checking)
- uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
- uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for
- // partially OOB vertices
-};
-
-struct SWR_INDEX_BUFFER_STATE
-{
- gfxptr_t xpIndices;
- // Format type for indices (e.g. UINT16, UINT32, etc.)
- SWR_FORMAT format; // @llvm_enum
- uint32_t size;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_FETCH_CONTEXT
-/// @brief Input to fetch shader.
-/// @note WARNING - Changes to this struct need to be reflected in the
-/// fetch shader jit.
-/////////////////////////////////////////////////////////////////////////
-struct SWR_FETCH_CONTEXT
-{
- const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
- gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
- gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
- uint32_t CurInstance; // IN: current instance
- uint32_t BaseVertex; // IN: base vertex
- uint32_t StartVertex; // IN: start vertex
- uint32_t StartInstance; // IN: start instance
- simdscalari VertexID; // OUT: vector of vertex IDs
- simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
-#if USE_SIMD16_SHADERS
- // simd16scalari VertexID; // OUT: vector of vertex IDs
- // simd16scalari CutMask; // OUT: vector mask of indices which have the
- // cut index value
- simdscalari VertexID2; // OUT: vector of vertex IDs
- simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
-#endif
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_STATS
-///
-/// @brief All statistics generated by SWR go here. These are public
-/// to driver.
-/////////////////////////////////////////////////////////////////////////
-OSALIGNLINE(struct) SWR_STATS
-{
- // Occlusion Query
- uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
-
- // Pipeline Stats
- uint64_t PsInvocations; // Number of Pixel Shader invocations
- uint64_t CsInvocations; // Number of Compute Shader invocations
-
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_STATS
-///
-/// @brief All statistics generated by FE.
-/////////////////////////////////////////////////////////////////////////
-OSALIGNLINE(struct) SWR_STATS_FE
-{
- uint64_t IaVertices; // Number of Fetch Shader vertices
- uint64_t IaPrimitives; // Number of PA primitives.
- uint64_t VsInvocations; // Number of Vertex Shader invocations
- uint64_t HsInvocations; // Number of Hull Shader invocations
- uint64_t DsInvocations; // Number of Domain Shader invocations
- uint64_t GsInvocations; // Number of Geometry Shader invocations
- uint64_t GsPrimitives; // Number of prims GS outputs.
- uint64_t CInvocations; // Number of clipper invocations
- uint64_t CPrimitives; // Number of clipper primitives.
-
- // Streamout Stats
- uint64_t SoPrimStorageNeeded[4];
- uint64_t SoNumPrimsWritten[4];
-};
-
- //////////////////////////////////////////////////////////////////////////
- /// STREAMOUT_BUFFERS
- /////////////////////////////////////////////////////////////////////////
-
-#define MAX_SO_STREAMS 4
-#define MAX_SO_BUFFERS 4
-#define MAX_ATTRIBUTES 32
-
-struct SWR_STREAMOUT_BUFFER
-{
- // Pointers to streamout buffers.
- gfxptr_t pBuffer;
-
- // Offset to the SO write offset. If not null then we update offset here.
- gfxptr_t pWriteOffset;
-
- bool enable;
- bool soWriteEnable;
-
- // Size of buffer in dwords.
- uint32_t bufferSize;
-
- // Vertex pitch of buffer in dwords.
- uint32_t pitch;
-
- // Offset into buffer in dwords. SOS will increment this offset.
- uint32_t streamOffset;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// STREAMOUT_STATE
-/////////////////////////////////////////////////////////////////////////
-struct SWR_STREAMOUT_STATE
-{
- // This disables stream output.
- bool soEnable;
-
- // which streams are enabled for streamout
- bool streamEnable[MAX_SO_STREAMS];
-
- // If set then do not send any streams to the rasterizer.
- bool rasterizerDisable;
-
- // Specifies which stream to send to the rasterizer.
- uint32_t streamToRasterizer;
-
- // The stream masks specify which attributes are sent to which streams.
- // These masks help the FE to setup the pPrimData buffer that is passed
- // the Stream Output Shader (SOS) function.
- uint64_t streamMasks[MAX_SO_STREAMS];
-
- // Number of attributes, including position, per vertex that are streamed out.
- // This should match number of bits in stream mask.
- uint32_t streamNumEntries[MAX_SO_STREAMS];
-
- // Offset to the start of the attributes of the input vertices, in simdvector units
- uint32_t vertexAttribOffset[MAX_SO_STREAMS];
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// STREAMOUT_CONTEXT - Passed to SOS
-/////////////////////////////////////////////////////////////////////////
-struct SWR_STREAMOUT_CONTEXT
-{
- uint32_t* pPrimData;
- SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
-
- // Num prims written for this stream
- uint32_t numPrimsWritten;
-
- // Num prims that should have been written if there were no overflow.
- uint32_t numPrimStorageNeeded;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_GS_STATE - Geometry shader state
-/////////////////////////////////////////////////////////////////////////
-struct SWR_GS_STATE
-{
- bool gsEnable;
-
- // If true, geometry shader emits a single stream, with separate cut buffer.
- // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a
- // separate StreamID buffer to map vertices to streams
- bool isSingleStream;
-
- // Number of input attributes per vertex. Used by the frontend to
- // optimize assembling primitives for GS
- uint32_t numInputAttribs;
-
- // Stride of incoming verts in attributes
- uint32_t inputVertStride;
-
- // Output topology - can be point, tristrip, linestrip, or rectlist
- PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
-
- // Maximum number of verts that can be emitted by a single instance of the GS
- uint32_t maxNumVerts;
-
- // Instance count
- uint32_t instanceCount;
-
- // When single stream is enabled, singleStreamID dictates which stream is being output.
- // field ignored if isSingleStream is false
- uint32_t singleStreamID;
-
- // Total amount of memory to allocate for one instance of the shader output in bytes
- uint32_t allocationSize;
-
- // Offset to start reading data per input vertex in simdvector units. This can be used to
- // skip over any vertex data output from the previous stage that is unused in the GS, removing
- // unnecessary vertex processing.
- uint32_t vertexAttribOffset;
-
- // Size of the control data section which contains cut or streamID data, in simdscalar units.
- // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
- // no cuts or streamID bits.
- uint32_t controlDataSize;
-
- // Offset to the control data section, in bytes
- uint32_t controlDataOffset;
-
- // Total size of an output vertex, in simdvector units
- uint32_t outputVertexSize;
-
- // Offset to the start of the vertex section, in bytes
- uint32_t outputVertexOffset;
-
- // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero,
- // shader is expected to store the final vertex count in the first dword of the gs output
- // stream.
- uint32_t staticVertexCount;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
-/////////////////////////////////////////////////////////////////////////
-enum SWR_TS_OUTPUT_TOPOLOGY
-{
- SWR_TS_OUTPUT_POINT,
- SWR_TS_OUTPUT_LINE,
- SWR_TS_OUTPUT_TRI_CW,
- SWR_TS_OUTPUT_TRI_CCW,
-
- SWR_TS_OUTPUT_TOPOLOGY_COUNT
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TS_PARTITIONING - Defines tessellation algorithm
-/////////////////////////////////////////////////////////////////////////
-enum SWR_TS_PARTITIONING
-{
- SWR_TS_INTEGER,
- SWR_TS_ODD_FRACTIONAL,
- SWR_TS_EVEN_FRACTIONAL,
-
- SWR_TS_PARTITIONING_COUNT
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TS_DOMAIN - Defines Tessellation Domain
-/////////////////////////////////////////////////////////////////////////
-enum SWR_TS_DOMAIN
-{
- SWR_TS_QUAD,
- SWR_TS_TRI,
- SWR_TS_ISOLINE,
-
- SWR_TS_DOMAIN_COUNT
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_TS_STATE - Tessellation state
-/////////////////////////////////////////////////////////////////////////
-struct SWR_TS_STATE
-{
- bool tsEnable;
-
- SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
- SWR_TS_PARTITIONING partitioning; // @llvm_enum
- SWR_TS_DOMAIN domain; // @llvm_enum
-
- PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
-
- uint32_t numHsInputAttribs;
- uint32_t numHsOutputAttribs;
- uint32_t hsAllocationSize; // Size of HS output in bytes, per lane
-
- uint32_t numDsOutputAttribs;
- uint32_t dsAllocationSize;
- uint32_t dsOutVtxAttribOffset;
-
- // Offset to the start of the attributes of the input vertices, in simdvector units
- uint32_t srcVertexAttribOffset;
-
- // Offset to the start of the attributes expected by the hull shader
- uint32_t vertexAttribOffset;
-};
-
-// output merger state
-struct SWR_RENDER_TARGET_BLEND_STATE
-{
- uint8_t writeDisableRed : 1;
- uint8_t writeDisableGreen : 1;
- uint8_t writeDisableBlue : 1;
- uint8_t writeDisableAlpha : 1;
-};
-static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1,
- "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
-
-enum SWR_MULTISAMPLE_COUNT
-{
- SWR_MULTISAMPLE_1X = 0,
- SWR_MULTISAMPLE_2X,
- SWR_MULTISAMPLE_4X,
- SWR_MULTISAMPLE_8X,
- SWR_MULTISAMPLE_16X,
- SWR_MULTISAMPLE_TYPE_COUNT
-};
-
-static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
-{
- return uint32_t(1) << sampleCountEnum;
-} // @llvm_func_end
-
-struct SWR_BLEND_STATE
-{
- // constant blend factor color in RGBA float
- float constantColor[4];
-
- // alpha test reference value in unorm8 or float32
- uint32_t alphaTestReference;
- uint32_t sampleMask;
- // all RT's have the same sample count
- ///@todo move this to Output Merger state when we refactor
- SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
-
- SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
-};
-static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
-
-struct SWR_BLEND_CONTEXT
-{
- const SWR_BLEND_STATE* pBlendState;
- simdvector* src;
- simdvector* src1;
- simdvector* src0alpha;
- uint32_t sampleNum;
- simdvector* pDst;
- simdvector* result;
- simdscalari* oMask;
- simdscalari* pMask;
- uint32_t isAlphaTested;
- uint32_t isAlphaBlended;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// FUNCTION POINTERS FOR SHADERS
-
-#if USE_SIMD16_SHADERS
-typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
-#else
-typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
-#endif
-typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
-typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
-typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
-typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
-typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
-typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_STREAMOUT_CONTEXT& soContext);
-typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
-typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
-typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
-typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
-
-
-//////////////////////////////////////////////////////////////////////////
-/// FRONTEND_STATE
-/////////////////////////////////////////////////////////////////////////
-struct SWR_FRONTEND_STATE
-{
- // skip clip test, perspective divide, and viewport transform
- // intended for verts in screen space
- bool vpTransformDisable;
- bool bEnableCutIndex;
- union
- {
- struct
- {
- uint32_t triFan : 2;
- uint32_t lineStripList : 1;
- uint32_t triStripList : 2;
- };
- uint32_t bits;
- } provokingVertex;
- uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
-
- // Size of a vertex in simdvector units. Should be sized to the
- // maximum of the input/output of the vertex shader.
- uint32_t vsVertexSize;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// VIEWPORT_MATRIX
-/////////////////////////////////////////////////////////////////////////
-struct SWR_VIEWPORT_MATRIX
-{
- float m00;
- float m11;
- float m22;
- float m30;
- float m31;
- float m32;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// VIEWPORT_MATRIXES
-/////////////////////////////////////////////////////////////////////////
-struct SWR_VIEWPORT_MATRICES
-{
- float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
- float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
- float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
- float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
- float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
- float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_VIEWPORT
-/////////////////////////////////////////////////////////////////////////
-struct SWR_VIEWPORT
-{
- float x;
- float y;
- float width;
- float height;
- float minZ;
- float maxZ;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_CULLMODE
-//////////////////////////////////////////////////////////////////////////
-enum SWR_CULLMODE
-{
- SWR_CULLMODE_BOTH,
- SWR_CULLMODE_NONE,
- SWR_CULLMODE_FRONT,
- SWR_CULLMODE_BACK
-};
-
-enum SWR_FILLMODE
-{
- SWR_FILLMODE_POINT,
- SWR_FILLMODE_WIREFRAME,
- SWR_FILLMODE_SOLID
-};
-
-enum SWR_FRONTWINDING
-{
- SWR_FRONTWINDING_CW,
- SWR_FRONTWINDING_CCW
-};
-
-
-enum SWR_PIXEL_LOCATION
-{
- SWR_PIXEL_LOCATION_CENTER,
- SWR_PIXEL_LOCATION_UL,
-};
-
-// fixed point screen space sample locations within a pixel
-struct SWR_MULTISAMPLE_POS
-{
-public:
- INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
- INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
- INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
- INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
- INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
- INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
- INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
- INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
- typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
- INLINE sampleArrayT X() const { return _x; }; // @llvm_func
- INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
- INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
- INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
- INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
- INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
- INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
- INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
-
- INLINE void PrecalcSampleData(int numSamples); //@llvm_func
-
-private:
- template <typename MaskT>
- INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
- INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
-
- // scalar sample values
- uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
- uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
- float _x[SWR_MAX_NUM_MULTISAMPLES];
- float _y[SWR_MAX_NUM_MULTISAMPLES];
-
- // precalc'd / vectorized samples
- __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
- __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
- simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
- simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
- __m128i tileSampleOffsetsX;
- __m128i tileSampleOffsetsY;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_RASTSTATE
-//////////////////////////////////////////////////////////////////////////
-struct SWR_RASTSTATE
-{
- uint32_t cullMode : 2;
- uint32_t fillMode : 2;
- uint32_t frontWinding : 1;
- uint32_t scissorEnable : 1;
- uint32_t depthClipEnable : 1;
- uint32_t clipEnable : 1;
- uint32_t clipHalfZ : 1;
- uint32_t pointParam : 1;
- uint32_t pointSpriteEnable : 1;
- uint32_t pointSpriteTopOrigin : 1;
- uint32_t forcedSampleCount : 1;
- uint32_t pixelOffset : 1;
- uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
- uint32_t conservativeRast : 1;
-
- float pointSize;
- float lineWidth;
-
- float depthBias;
- float slopeScaledDepthBias;
- float depthBiasClamp;
- SWR_FORMAT depthFormat; // @llvm_enum
-
- // sample count the rasterizer is running at
- SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
- uint32_t pixelLocation; // UL or Center
- SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
- bool bIsCenterPattern; // @llvm_enum
-};
-
-
-enum SWR_CONSTANT_SOURCE
-{
- SWR_CONSTANT_SOURCE_CONST_0000,
- SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
- SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
- SWR_CONSTANT_SOURCE_PRIM_ID
-};
-
-struct SWR_ATTRIB_SWIZZLE
-{
- uint16_t sourceAttrib : 5; // source attribute
- uint16_t constantSource : 2; // constant source to apply
- uint16_t componentOverrideMask : 4; // override component with constant source
-};
-
-// backend state
-struct SWR_BACKEND_STATE
-{
- uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant
- // interpolation
- uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be
- // interpreted as tex coordinates
-
- bool swizzleEnable; // when enabled, core will parse the swizzle map when
- // setting up attributes for the backend, otherwise
- // all attributes up to numAttributes will be sent
- uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
- uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some
- // calculations for unneeded components
-
- bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the
- // backend
- bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
-
- // User clip/cull distance enables
- uint8_t cullDistanceMask;
- uint8_t clipDistanceMask;
-
- // padding to ensure swizzleMap starts 64B offset from start of the struct
- // and that the next fields are dword aligned.
- uint8_t pad[10];
-
- // Offset to the start of the attributes of the input vertices, in simdvector units
- uint32_t vertexAttribOffset;
-
- // Offset to clip/cull attrib section of the vertex, in simdvector units
- uint32_t vertexClipCullOffset;
-
- SWR_ATTRIB_SWIZZLE swizzleMap[32];
-};
-static_assert(sizeof(SWR_BACKEND_STATE) == 128,
- "Adjust padding to keep size (or remove this assert)");
-
-
-union SWR_DEPTH_STENCIL_STATE
-{
- struct
- {
- // dword 0
- uint32_t depthWriteEnable : 1;
- uint32_t depthTestEnable : 1;
- uint32_t stencilWriteEnable : 1;
- uint32_t stencilTestEnable : 1;
- uint32_t doubleSidedStencilTestEnable : 1;
-
- uint32_t depthTestFunc : 3;
- uint32_t stencilTestFunc : 3;
-
- uint32_t backfaceStencilPassDepthPassOp : 3;
- uint32_t backfaceStencilPassDepthFailOp : 3;
- uint32_t backfaceStencilFailOp : 3;
- uint32_t backfaceStencilTestFunc : 3;
- uint32_t stencilPassDepthPassOp : 3;
- uint32_t stencilPassDepthFailOp : 3;
- uint32_t stencilFailOp : 3;
-
- // dword 1
- uint8_t backfaceStencilWriteMask;
- uint8_t backfaceStencilTestMask;
- uint8_t stencilWriteMask;
- uint8_t stencilTestMask;
-
- // dword 2
- uint8_t backfaceStencilRefValue;
- uint8_t stencilRefValue;
- };
- uint32_t value[3];
-};
-
-enum SWR_SHADING_RATE
-{
- SWR_SHADING_RATE_PIXEL,
- SWR_SHADING_RATE_SAMPLE,
- SWR_SHADING_RATE_COUNT,
-};
-
-enum SWR_INPUT_COVERAGE
-{
- SWR_INPUT_COVERAGE_NONE,
- SWR_INPUT_COVERAGE_NORMAL,
- SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
- SWR_INPUT_COVERAGE_COUNT,
-};
-
-enum SWR_PS_POSITION_OFFSET
-{
- SWR_PS_POSITION_SAMPLE_NONE,
- SWR_PS_POSITION_SAMPLE_OFFSET,
- SWR_PS_POSITION_CENTROID_OFFSET,
- SWR_PS_POSITION_OFFSET_COUNT,
-};
-
-enum SWR_BARYCENTRICS_MASK
-{
- SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
- SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
- SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
-};
-
-// pixel shader state
-struct SWR_PS_STATE
-{
- // dword 0-1
- PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
-
- // dword 2
- uint32_t killsPixel : 1; // pixel shader can kill pixels
- uint32_t inputCoverage : 2; // ps uses input coverage
- uint32_t writesODepth : 1; // pixel shader writes to depth
- uint32_t usesSourceDepth : 1; // pixel shader reads depth
- uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
- uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
- uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate
- // attributes with
- uint32_t usesUAV : 1; // pixel shader accesses UAV
- uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
-
- uint8_t renderTargetMask; // Mask of render targets written
-};
-
-// depth bounds state
-struct SWR_DEPTH_BOUNDS_STATE
-{
- bool depthBoundsTestEnable;
- float depthBoundsTestMinValue;
- float depthBoundsTestMaxValue;
-};
-// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/core/state_funcs.h b/src/gallium/drivers/swr/rasterizer/core/state_funcs.h
deleted file mode 100644
index 99eac835ea8..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/state_funcs.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file state.h
- *
- * @brief Definitions for API state - complex function implementation.
- *
- ******************************************************************************/
-#pragma once
-
-#include "core/state.h"
-#include "common/simdintrin.h"
-
-template <typename MaskT>
-INLINE __m128i SWR_MULTISAMPLE_POS::expandThenBlend4(uint32_t* min, uint32_t* max)
-{
- __m128i vMin = _mm_set1_epi32(*min);
- __m128i vMax = _mm_set1_epi32(*max);
- return _simd_blend4_epi32<MaskT::value>(vMin, vMax);
-}
-
-INLINE void SWR_MULTISAMPLE_POS::PrecalcSampleData(int numSamples)
-{
- for (int i = 0; i < numSamples; i++)
- {
- _vXi[i] = _mm_set1_epi32(_xi[i]);
- _vYi[i] = _mm_set1_epi32(_yi[i]);
- _vX[i] = _simd_set1_ps(_x[i]);
- _vY[i] = _simd_set1_ps(_y[i]);
- }
- // precalculate the raster tile BB for the rasterizer.
- CalcTileSampleOffsets(numSamples);
-}
-
-INLINE void SWR_MULTISAMPLE_POS::CalcTileSampleOffsets(int numSamples)
-{
- auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
- auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
- using xMask = std::integral_constant<int, 0xA>;
- // BR(max), BL(min), UR(max), UL(min)
- tileSampleOffsetsX = expandThenBlend4<xMask>(minXi, maxXi);
-
- auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
- auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
- using yMask = std::integral_constant<int, 0xC>;
- // BR(max), BL(min), UR(max), UL(min)
- tileSampleOffsetsY = expandThenBlend4<yMask>(minYi, maxYi);
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/tessellator.cpp b/src/gallium/drivers/swr/rasterizer/core/tessellator.cpp
deleted file mode 100644
index 08f2bce339c..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/tessellator.cpp
+++ /dev/null
@@ -1,2689 +0,0 @@
-/*
- Copyright (c) Microsoft Corporation
-
- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
- associated documentation files (the "Software"), to deal in the Software without restriction,
- including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
- and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
- subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
- NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#include "tessellator.hpp"
-#if defined(_MSC_VER)
-#include <math.h> // ceil
-#else
-#include <cmath>
-#endif
-//#include <windows.h> // Just used for some commented out debug stat printing.
-//#include <strsafe.h> // Ditto.
-#define min(x,y) (x < y ? x : y)
-#define max(x,y) (x > y ? x : y)
-
-//=================================================================================================================================
-// Some D3D Compliant Float Math (reference rasterizer implements these in RefALU class)
-//=================================================================================================================================
-//
-//---------------------------------------------------------------------------------------------------------------------------------
-// isNaN
-//---------------------------------------------------------------------------------------------------------------------------------
-static bool tess_isNaN( float a )
-{
- static const int exponentMask = 0x7f800000;
- static const int mantissaMask = 0x007fffff;
- int u = *(int*)&a;
- return ( ( ( u & exponentMask ) == exponentMask ) && ( u & mantissaMask ) ); // NaN
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// flush (denorm)
-//---------------------------------------------------------------------------------------------------------------------------------
-static float tess_flush( float a )
-{
- static const int minNormalizedFloat = 0x00800000;
- static const int signBit = 0x80000000;
- static const int signBitComplement = 0x7fffffff;
- int b = (*(int*)&a) & signBitComplement; // fabs()
- if( b < minNormalizedFloat ) // UINT comparison. NaN/INF do test false here
- {
- b = signBit & (*(int*)&a);
- return *(float*)&b;
- }
- return a;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// IEEE754R min
-//---------------------------------------------------------------------------------------------------------------------------------
-static float tess_fmin( float a, float b )
-{
- float _a = tess_flush( a );
- float _b = tess_flush( b );
- if( tess_isNaN( _b ) )
- {
- return a;
- }
- else if( ( _a == 0 ) && ( _b == 0 ) )
- {
- return ( (*(int*)&_a) & 0x80000000 ) ? a : b;
- }
- return _a < _b ? a : b;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// IEEE754R max
-//---------------------------------------------------------------------------------------------------------------------------------
-static float tess_fmax( float a, float b )
-{
- float _a = tess_flush( a );
- float _b = tess_flush( b );
-
- if( tess_isNaN( _b ) )
- {
- return a;
- }
- else if( ( _a == 0 ) && ( _b == 0 ) )
- {
- return ( (*(int*)&_b) & 0x80000000 ) ? a : b;
- }
- return _a >= _b ? a : b;
-}
-
-//=================================================================================================================================
-// Fixed Point Math
-//=================================================================================================================================
-
-//-----------------------------------------------------------------------------------------------------------------------------
-// floatToFixedPoint
-//
-// Convert 32-bit float to 32-bit fixed point integer, using only
-// integer arithmetic + bitwise operations.
-//
-// c_uIBits: UINT8 : Width of i (aka. integer bits)
-// c_uFBits: UINT8 : Width of f (aka. fractional bits)
-// c_bSigned: bool : Whether the integer bits are a 2's complement signed value
-// input: float : All values valid.
-// output: INT32 : At most 24 bits from LSB are meaningful, depending
-// on the fixed point bit representation chosen (see
-// below). Extra bits are sign extended from the most
-// meaningful bit.
-//
-//-----------------------------------------------------------------------------------------------------------------------------
-
-typedef unsigned char UINT8;
-typedef int INT32;
-template< const UINT8 c_uIBits, const UINT8 c_uFBits, const bool c_bSigned >
-INT32 floatToIDotF( const float& input )
-{
- // ------------------------------------------------------------------------
- // output fixed point format
- // 32-bit result:
- //
- // [sign-extend]i.f
- // | |
- // MSB(31)...LSB(0)
- //
- // f fractional part of the number, an unsigned
- // value with _fxpFracBitCount bits (defined below)
- //
- // . implied decimal
- //
- // i integer part of the number, a 2's complement
- // value with _fxpIntBitCount bits (defined below)
- //
- // [sign-extend] MSB of i conditionally replicated
- //
- // ------------------------------------------------------------------------
- // Define fixed point bit counts
- //
-
- // Commenting out C_ASSERT below to minimise #includes:
- // C_ASSERT( 2 <= c_uIBits && c_uIBits <= 32 && c_uFBits <= 32 && c_uIBits + c_uFBits <= 32 );
-
- // Define most negative and most positive fixed point values
- const INT32 c_iMinResult = (c_bSigned ? INT32( -1 ) << (c_uIBits + c_uFBits - 1) : 0);
- const INT32 c_iMaxResult = ~c_iMinResult;
-
- // ------------------------------------------------------------------------
- // constant float properties
- // ------------------------------------------------------------------------
- const UINT8 _fltMantissaBitCount = 23;
- const UINT8 _fltExponentBitCount = 8;
- const INT32 _fltExponentBias = (INT32( 1 ) << (_fltExponentBitCount - 1)) - 1;
- const INT32 _fltHiddenBit = INT32( 1 ) << _fltMantissaBitCount;
- const INT32 _fltMantissaMask = _fltHiddenBit - 1;
- const INT32 _fltExponentMask = ((INT32( 1 ) << _fltExponentBitCount) - 1) << _fltMantissaBitCount;
- const INT32 _fltSignBit = INT32( 1 ) << (_fltExponentBitCount + _fltMantissaBitCount);
-
- // ------------------------------------------------------------------------
- // define min and max values as floats (clamp to these bounds)
- // ------------------------------------------------------------------------
- INT32 _fxpMaxPosValueFloat;
- INT32 _fxpMaxNegValueFloat;
-
- if (c_bSigned)
- {
- // The maximum positive fixed point value is 2^(i-1) - 2^(-f).
- // The following constructs the floating point bit pattern for this value,
- // as long as i >= 2.
- _fxpMaxPosValueFloat = (_fltExponentBias + c_uIBits - 1) <<_fltMantissaBitCount;
- const INT32 iShift = _fltMantissaBitCount + 2 - c_uIBits - c_uFBits;
- if (iShift >= 0)
- {
-// assert( iShift < 32 );
-#if defined(_MSC_VER)
-#pragma warning( suppress : 4293 )
-#endif
- _fxpMaxPosValueFloat -= INT32( 1 ) << iShift;
- }
-
- // The maximum negative fixed point value is -2^(i-1).
- // The following constructs the floating point bit pattern for this value,
- // as long as i >= 2.
- // We need this number without the sign bit
- _fxpMaxNegValueFloat = (_fltExponentBias + c_uIBits - 1) << _fltMantissaBitCount;
- }
- else
- {
- // The maximum positive fixed point value is 2^(i) - 2^(-f).
- // The following constructs the floating point bit pattern for this value,
- // as long as i >= 2.
- _fxpMaxPosValueFloat = (_fltExponentBias + c_uIBits) <<_fltMantissaBitCount;
- const INT32 iShift = _fltMantissaBitCount + 1 - c_uIBits - c_uFBits;
- if (iShift >= 0)
- {
-// assert( iShift < 32 );
-#if defined(_MSC_VER)
-#pragma warning( suppress : 4293 )
-#endif
- _fxpMaxPosValueFloat -= INT32( 1 ) << iShift;
- }
-
- // The maximum negative fixed point value is 0.
- _fxpMaxNegValueFloat = 0;
- }
-
- // ------------------------------------------------------------------------
- // float -> fixed conversion
- // ------------------------------------------------------------------------
-
- // ------------------------------------------------------------------------
- // examine input float
- // ------------------------------------------------------------------------
- INT32 output = *(INT32*)&input;
- INT32 unbiasedExponent = ((output & _fltExponentMask) >> _fltMantissaBitCount) - _fltExponentBias;
- INT32 isNegative = output & _fltSignBit;
-
- // ------------------------------------------------------------------------
- // nan
- // ------------------------------------------------------------------------
- if (unbiasedExponent == (_fltExponentBias + 1) && (output & _fltMantissaMask))
- {
- // nan converts to 0
- output = 0;
- }
- // ------------------------------------------------------------------------
- // too large positive
- // ------------------------------------------------------------------------
- else if (!isNegative && output >= _fxpMaxPosValueFloat) // integer compare
- {
- output = c_iMaxResult;
- }
- // ------------------------------------------------------------------------
- // too large negative
- // ------------------------------------------------------------------------
- // integer compare
- else if (isNegative && (output & ~_fltSignBit) >= _fxpMaxNegValueFloat)
- {
- output = c_iMinResult;
- }
- // ------------------------------------------------------------------------
- // too small
- // ------------------------------------------------------------------------
- else if (unbiasedExponent < -c_uFBits - 1)
- {
- // clamp to 0
- output = 0;
- }
- // ------------------------------------------------------------------------
- // within range
- // ------------------------------------------------------------------------
- else
- {
- // copy mantissa, add hidden bit
- output = (output & _fltMantissaMask) | _fltHiddenBit;
-
- INT32 extraBits = _fltMantissaBitCount - c_uFBits - unbiasedExponent;
- if (extraBits >= 0)
- {
- // 2's complement if negative
- if (isNegative)
- {
- output = ~output + 1;
- }
-
- // From the range checks that led here, it is known that
- // unbiasedExponent < c_uIBits. So, at most:
- // (a) unbiasedExponent == c_uIBits - 1.
- //
- // From compile validation above, it is known that
- // c_uIBits + c_uFBits <= _fltMantissaBitCount + 1).
- // So, at minimum:
- // (b) _fltMantissaBitCount == _fxtIntBitCount + c_uFBits - 1
- //
- // Substituting (a) and (b) into extraBits calculation above:
- // extraBits >= (_fxtIntBitCount + c_uFBits - 1)
- // - c_uFBits - (c_uIBits - 1)
- // extraBits >= 0
- //
- // Thus we only have to worry about shifting right by 0 or more
- // bits to get the decimal to the right place, and never have
- // to shift left.
-
- INT32 LSB = 1 << extraBits; // last bit being kept
- INT32 extraBitsMask = LSB - 1;
- INT32 half = LSB >> 1; // round bias
-
- // round to nearest-even at LSB
- if ((output & LSB) || (output & extraBitsMask) > half)
- {
- output += half;
- }
-
- // shift off the extra bits (sign extending)
- output >>= extraBits;
- }
- else
- {
- output <<= -extraBits;
-
- // 2's complement if negative
- if (isNegative)
- {
- output = ~output + 1;
- }
- }
- }
- return output;
-}
-//-----------------------------------------------------------------------------------------------------------------------------
-
-#define FXP_INTEGER_BITS 15
-#define FXP_FRACTION_BITS 16
-#define FXP_FRACTION_MASK 0x0000ffff
-#define FXP_INTEGER_MASK 0x7fff0000
-#define FXP_THREE (3<<FXP_FRACTION_BITS)
-#define FXP_ONE (1<<FXP_FRACTION_BITS)
-#define FXP_ONE_THIRD 0x00005555
-#define FXP_TWO_THIRDS 0x0000aaaa
-#define FXP_ONE_HALF 0x00008000
-
-#define FXP_MAX_INPUT_TESS_FACTOR_BEFORE_TRIPLE_AVERAGE 0x55540000 // 1/3 of max fixed point number - 1. Numbers less than
- // or equal to this allows avg. reduction on a tri patch
- // including rounding.
-
-#define FXP_MAX_INPUT_TESS_FACTOR_BEFORE_PAIR_AVERAGE 0x7FFF0000 // 1/2 of max fixed point number - 1. Numbers less than
- // or equal to this allows avg. reduction on a quad patch
- // including rounding.
-
-static const FXP s_fixedReciprocal[D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR+1] =
-{
- 0xffffffff, // 1/0 is the first entry (unused)
- 0x10000, 0x8000, 0x5555, 0x4000,
- 0x3333, 0x2aab, 0x2492, 0x2000,
- 0x1c72, 0x199a, 0x1746, 0x1555,
- 0x13b1, 0x1249, 0x1111, 0x1000,
- 0xf0f, 0xe39, 0xd79, 0xccd,
- 0xc31, 0xba3, 0xb21, 0xaab,
- 0xa3d, 0x9d9, 0x97b, 0x925,
- 0x8d4, 0x889, 0x842, 0x800,
- 0x7c2, 0x788, 0x750, 0x71c,
- 0x6eb, 0x6bd, 0x690, 0x666,
- 0x63e, 0x618, 0x5f4, 0x5d1,
- 0x5b0, 0x591, 0x572, 0x555,
- 0x539, 0x51f, 0x505, 0x4ec,
- 0x4d5, 0x4be, 0x4a8, 0x492,
- 0x47e, 0x46a, 0x457, 0x444,
- 0x432, 0x421, 0x410, 0x400, // 1/64 is the last entry
-};
-
-#define FLOAT_THREE 3.0f
-#define FLOAT_ONE 1.0f
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// floatToFixed
-//---------------------------------------------------------------------------------------------------------------------------------
-FXP floatToFixed(const float& input)
-{
- return floatToIDotF< FXP_INTEGER_BITS, FXP_FRACTION_BITS, /*bSigned*/false >( input );
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// fixedToFloat
-//---------------------------------------------------------------------------------------------------------------------------------
-float fixedToFloat(const FXP& input)
-{
- // not worrying about denorm flushing the float operations (the DX spec behavior for div), since the numbers will not be that small during tessellation.
- return ((float)(input>>FXP_FRACTION_BITS) + (float)(input&FXP_FRACTION_MASK)/(1<<FXP_FRACTION_BITS));
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// isEven
-//---------------------------------------------------------------------------------------------------------------------------------
-bool isEven(const float& input)
-{
- return (((int)input) & 1) ? false : true;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// fxpCeil
-//---------------------------------------------------------------------------------------------------------------------------------
-FXP fxpCeil(const FXP& input)
-{
- if( input & FXP_FRACTION_MASK )
- {
- return (input & FXP_INTEGER_MASK) + FXP_ONE;
- }
- return input;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// fxpFloor
-//---------------------------------------------------------------------------------------------------------------------------------
-FXP fxpFloor(const FXP& input)
-{
- return (input & FXP_INTEGER_MASK);
-}
-
-//=================================================================================================================================
-// CHWTessellator
-//=================================================================================================================================
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::CHWTessellator
-//---------------------------------------------------------------------------------------------------------------------------------
-CHWTessellator::CHWTessellator()
-{
- m_Point = 0;
- m_Index = 0;
- m_NumPoints = 0;
- m_NumIndices = 0;
- m_bUsingPatchedIndices = false;
- m_bUsingPatchedIndices2 = false;
-#ifdef ALLOW_XBOX_360_COMPARISON
- m_bXBox360Mode = false;
-#endif
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::~CHWTessellator
-//---------------------------------------------------------------------------------------------------------------------------------
-CHWTessellator::~CHWTessellator()
-{
- delete [] m_Point;
- delete [] m_Index;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::Init
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::Init(
- D3D11_TESSELLATOR_PARTITIONING partitioning,
- D3D11_TESSELLATOR_OUTPUT_PRIMITIVE outputPrimitive)
-{
- if( 0 == m_Point )
- {
- m_Point = new DOMAIN_POINT[MAX_POINT_COUNT];
- }
- if( 0 == m_Index )
- {
- m_Index = new int[MAX_INDEX_COUNT];
- }
- m_partitioning = partitioning;
- m_originalPartitioning = partitioning;
- switch( partitioning )
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- default:
- break;
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- m_parity = TESSELLATOR_PARITY_ODD;
- break;
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- m_parity = TESSELLATOR_PARITY_EVEN;
- break;
- }
- m_originalParity = m_parity;
- m_outputPrimitive = outputPrimitive;
- m_NumPoints = 0;
- m_NumIndices = 0;
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::TessellateQuadDomain
-// User calls this
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::TessellateQuadDomain( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Ueq1, float tessFactor_Veq1,
- float insideTessFactor_U, float insideTessFactor_V )
-{
- PROCESSED_TESS_FACTORS_QUAD processedTessFactors;
- QuadProcessTessFactors(tessFactor_Ueq0,tessFactor_Veq0,tessFactor_Ueq1,tessFactor_Veq1,insideTessFactor_U,insideTessFactor_V,processedTessFactors);
-
- if( processedTessFactors.bPatchCulled )
- {
- m_NumPoints = 0;
- m_NumIndices = 0;
- return;
- }
- else if( processedTessFactors.bJustDoMinimumTessFactor )
- {
- DefinePoint(/*U*/0,/*V*/0,/*pointStorageOffset*/0);
- DefinePoint(/*U*/FXP_ONE,/*V*/0,/*pointStorageOffset*/1);
- DefinePoint(/*U*/FXP_ONE,/*V*/FXP_ONE,/*pointStorageOffset*/2);
- DefinePoint(/*U*/0,/*V*/FXP_ONE,/*pointStorageOffset*/3);
- m_NumPoints = 4;
-
- switch(m_outputPrimitive)
- {
- case D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW:
- case D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
- // function orients them CCW if needed
- DefineClockwiseTriangle(0,1,3,/*indexStorageOffset*/0);
- DefineClockwiseTriangle(1,2,3,/*indexStorageOffset*/3);
- m_NumIndices = 6;
- break;
- case D3D11_TESSELLATOR_OUTPUT_POINT:
- DumpAllPoints();
- break;
- case D3D11_TESSELLATOR_OUTPUT_LINE:
- DumpAllPointsAsInOrderLineList();
- break;
- }
- return;
- }
-
- QuadGeneratePoints(processedTessFactors);
-
- if( m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_POINT )
- {
- DumpAllPoints();
- return;
- }
- if( m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_LINE )
- {
- DumpAllPointsAsInOrderLineList();
- return;
- }
-
- QuadGenerateConnectivity(processedTessFactors); // can be done in parallel to QuadGeneratePoints()
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::QuadProcessTessFactors
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::QuadProcessTessFactors( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Ueq1, float tessFactor_Veq1,
- float insideTessFactor_U, float insideTessFactor_V, PROCESSED_TESS_FACTORS_QUAD& processedTessFactors )
-{
- // Is the patch culled?
- if( !(tessFactor_Ueq0 > 0) || // NaN will pass
- !(tessFactor_Veq0 > 0) ||
- !(tessFactor_Ueq1 > 0) ||
- !(tessFactor_Veq1 > 0) )
- {
- processedTessFactors.bPatchCulled = true;
- return;
- }
- else
- {
- processedTessFactors.bPatchCulled = false;
- }
-
- // Clamp edge TessFactors
- float lowerBound = 0.0, upperBound = 0.0;
- switch(m_originalPartitioning)
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- case D3D11_TESSELLATOR_PARTITIONING_POW2: // don�t care about pow2 distinction for validation, just treat as integer
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
- break;
- }
-
- tessFactor_Ueq0 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Ueq0 ) );
- tessFactor_Veq0 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Veq0 ) );
- tessFactor_Ueq1 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Ueq1 ) );
- tessFactor_Veq1 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Veq1 ) );
-
- if( HWIntegerPartitioning()) // pow2 or integer, round to next int (hw doesn't care about pow2 distinction)
- {
- tessFactor_Ueq0 = ceil(tessFactor_Ueq0);
- tessFactor_Veq0 = ceil(tessFactor_Veq0);
- tessFactor_Ueq1 = ceil(tessFactor_Ueq1);
- tessFactor_Veq1 = ceil(tessFactor_Veq1);
- }
-
- // Clamp inside TessFactors
- if(D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD == m_originalPartitioning)
- {
-#define EPSILON 0.0000152587890625f // 2^(-16), min positive fixed point fraction
-#define MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON (D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON/2)
- // If any TessFactor will end up > 1 after floatToFixed conversion later,
- // then force the inside TessFactors to be > 1 so there is a picture frame.
- if( (tessFactor_Ueq0 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor_Veq0 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor_Ueq1 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor_Veq1 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (insideTessFactor_U > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (insideTessFactor_V > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) )
- {
- // Force picture frame
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON;
- }
- }
-
- insideTessFactor_U = tess_fmin( upperBound, tess_fmax( lowerBound, insideTessFactor_U ) );
- insideTessFactor_V = tess_fmin( upperBound, tess_fmax( lowerBound, insideTessFactor_V ) );
- // Note the above clamps map NaN to lowerBound
-
-
- if( HWIntegerPartitioning()) // pow2 or integer, round to next int (hw doesn't care about pow2 distinction)
- {
- insideTessFactor_U = ceil(insideTessFactor_U);
- insideTessFactor_V = ceil(insideTessFactor_V);
- }
-
- // Reset our vertex and index buffers. We have enough storage for the max tessFactor.
- m_NumPoints = 0;
- m_NumIndices = 0;
-
- // Process tessFactors
- float outsideTessFactor[QUAD_EDGES] = {tessFactor_Ueq0, tessFactor_Veq0, tessFactor_Ueq1, tessFactor_Veq1};
- float insideTessFactor[QUAD_AXES] = {insideTessFactor_U,insideTessFactor_V};
- int edge, axis;
- if( HWIntegerPartitioning() )
- {
- for( edge = 0; edge < QUAD_EDGES; edge++ )
- {
- int edgeEven = isEven(outsideTessFactor[edge]);
- processedTessFactors.outsideTessFactorParity[edge] = edgeEven ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- for( axis = 0; axis < QUAD_AXES; axis++ )
- {
- processedTessFactors.insideTessFactorParity[axis] =
- (isEven(insideTessFactor[axis]) || (FLOAT_ONE == insideTessFactor[axis]) )
- ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- }
- else
- {
- for( edge = 0; edge < QUAD_EDGES; edge++ )
- {
- processedTessFactors.outsideTessFactorParity[edge] = m_originalParity;
- }
- processedTessFactors.insideTessFactorParity[U] = processedTessFactors.insideTessFactorParity[V] = m_originalParity;
- }
-
- // Save fixed point TessFactors
- for( edge = 0; edge < QUAD_EDGES; edge++ )
- {
- processedTessFactors.outsideTessFactor[edge] = floatToFixed(outsideTessFactor[edge]);
- }
- for( axis = 0; axis < QUAD_AXES; axis++ )
- {
- processedTessFactors.insideTessFactor[axis] = floatToFixed(insideTessFactor[axis]);
- }
-
- if( HWIntegerPartitioning() || Odd() )
- {
- // Special case if all TessFactors are 1
- if( (FXP_ONE == processedTessFactors.insideTessFactor[U]) &&
- (FXP_ONE == processedTessFactors.insideTessFactor[V]) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Ueq0]) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Veq0]) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Ueq1]) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Veq1]) )
- {
- processedTessFactors.bJustDoMinimumTessFactor = true;
- return;
- }
- }
- processedTessFactors.bJustDoMinimumTessFactor = false;
-
- // Compute TessFactor-specific metadata
- for(int edge = 0; edge < QUAD_EDGES; edge++ )
- {
- SetTessellationParity(processedTessFactors.outsideTessFactorParity[edge]);
- ComputeTessFactorContext(processedTessFactors.outsideTessFactor[edge], processedTessFactors.outsideTessFactorCtx[edge]);
- }
-
- for(int axis = 0; axis < QUAD_AXES; axis++)
- {
- SetTessellationParity(processedTessFactors.insideTessFactorParity[axis]);
- ComputeTessFactorContext(processedTessFactors.insideTessFactor[axis], processedTessFactors.insideTessFactorCtx[axis]);
- }
-
- // Compute some initial data.
-
- // outside edge offsets and storage
- for(int edge = 0; edge < QUAD_EDGES; edge++ )
- {
- SetTessellationParity(processedTessFactors.outsideTessFactorParity[edge]);
- processedTessFactors.numPointsForOutsideEdge[edge] = NumPointsForTessFactor(processedTessFactors.outsideTessFactor[edge]);
- m_NumPoints += processedTessFactors.numPointsForOutsideEdge[edge];
- }
- m_NumPoints -= 4;
-
- // inside edge offsets
- for(int axis = 0; axis < QUAD_AXES; axis++)
- {
- SetTessellationParity(processedTessFactors.insideTessFactorParity[axis]);
- processedTessFactors.numPointsForInsideTessFactor[axis] = NumPointsForTessFactor(processedTessFactors.insideTessFactor[axis]);
- int pointCountMin = ( TESSELLATOR_PARITY_ODD == processedTessFactors.insideTessFactorParity[axis] ) ? 4 : 3;
- // max() allows degenerate transition regions when inside TessFactor == 1
- processedTessFactors.numPointsForInsideTessFactor[axis] = max(pointCountMin,processedTessFactors.numPointsForInsideTessFactor[axis]);
- }
-
- processedTessFactors.insideEdgePointBaseOffset = m_NumPoints;
-
- // inside storage, including interior edges above
- int numInteriorPoints = (processedTessFactors.numPointsForInsideTessFactor[U] - 2)*(processedTessFactors.numPointsForInsideTessFactor[V]-2);
- m_NumPoints += numInteriorPoints;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::QuadGeneratePoints
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::QuadGeneratePoints( const PROCESSED_TESS_FACTORS_QUAD& processedTessFactors )
-{
- // Generate exterior ring edge points, clockwise from top-left
- int pointOffset = 0;
- int edge;
- for(edge = 0; edge < QUAD_EDGES; edge++ )
- {
- int parity = edge&0x1;
- int startPoint = 0;
- int endPoint = processedTessFactors.numPointsForOutsideEdge[edge] - 1;
- for(int p = startPoint; p < endPoint; p++,pointOffset++) // don't include end, since next edge starts with it.
- {
- FXP fxpParam;
- int q = ((edge==1)||(edge==2)) ? p : endPoint - p; // reverse order
- SetTessellationParity(processedTessFactors.outsideTessFactorParity[edge]);
- PlacePointIn1D(processedTessFactors.outsideTessFactorCtx[edge],q,fxpParam);
- if( parity )
- {
- DefinePoint(/*U*/fxpParam,
- /*V*/(edge == 3) ? FXP_ONE : 0,
- /*pointStorageOffset*/pointOffset);
- }
- else
- {
- DefinePoint(/*U*/(edge == 2) ? FXP_ONE : 0,
- /*V*/fxpParam,
- /*pointStorageOffset*/pointOffset);
- }
- }
- }
-
- // Generate interior ring points, clockwise from (U==0,V==1) (bottom-left) spiralling toward center
- static const int startRing = 1;
- int minNumPointsForTessFactor = min(processedTessFactors.numPointsForInsideTessFactor[U],processedTessFactors.numPointsForInsideTessFactor[V]);
- int numRings = (minNumPointsForTessFactor >> 1); // note for even tess we aren't counting center point here.
- for(int ring = startRing; ring < numRings; ring++)
- {
- int startPoint = ring;
- int endPoint[QUAD_AXES] = {processedTessFactors.numPointsForInsideTessFactor[U] - 1 - startPoint,
- processedTessFactors.numPointsForInsideTessFactor[V] - 1 - startPoint};
-
- for(edge = 0; edge < QUAD_EDGES; edge++ )
- {
- int parity[QUAD_AXES] = {edge&0x1,((edge+1)&0x1)};
- int perpendicularAxisPoint = (edge < 2) ? startPoint : endPoint[parity[0]];
- FXP fxpPerpParam;
- SetTessellationParity(processedTessFactors.insideTessFactorParity[parity[0]]);
- PlacePointIn1D(processedTessFactors.insideTessFactorCtx[parity[0]],perpendicularAxisPoint,fxpPerpParam);
- SetTessellationParity(processedTessFactors.insideTessFactorParity[parity[1]]);
- for(int p = startPoint; p < endPoint[parity[1]]; p++, pointOffset++) // don't include end: next edge starts with it.
- {
- FXP fxpParam;
- int q = ((edge == 1)||(edge==2)) ? p : endPoint[parity[1]] - (p - startPoint);
- PlacePointIn1D(processedTessFactors.insideTessFactorCtx[parity[1]],q,fxpParam);
- if( parity[1] )
- {
- DefinePoint(/*U*/fxpPerpParam,
- /*V*/fxpParam,
- /*pointStorageOffset*/pointOffset);
- }
- else
- {
- DefinePoint(/*U*/fxpParam,
- /*V*/fxpPerpParam,
- /*pointStorageOffset*/pointOffset);
- }
- }
- }
- }
- // For even tessellation, the inner "ring" is degenerate - a row of points
- if( (processedTessFactors.numPointsForInsideTessFactor[U] > processedTessFactors.numPointsForInsideTessFactor[V]) &&
- (TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[V]) )
- {
- int startPoint = numRings;
- int endPoint = processedTessFactors.numPointsForInsideTessFactor[U] - 1 - startPoint;
- SetTessellationParity(processedTessFactors.insideTessFactorParity[U]);
- for( int p = startPoint; p <= endPoint; p++, pointOffset++ )
- {
- FXP fxpParam;
- PlacePointIn1D(processedTessFactors.insideTessFactorCtx[U],p,fxpParam);
- DefinePoint(/*U*/fxpParam,
- /*V*/FXP_ONE_HALF, // middle
- /*pointStorageOffset*/pointOffset);
- }
- }
- else if( (processedTessFactors.numPointsForInsideTessFactor[V] >= processedTessFactors.numPointsForInsideTessFactor[U]) &&
- (TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[U]) )
- {
- int startPoint = numRings;
- int endPoint;
- FXP fxpParam;
- endPoint = processedTessFactors.numPointsForInsideTessFactor[V] - 1 - startPoint;
- SetTessellationParity(processedTessFactors.insideTessFactorParity[V]);
- for( int p = endPoint; p >= startPoint; p--, pointOffset++ )
- {
- PlacePointIn1D(processedTessFactors.insideTessFactorCtx[V],p,fxpParam);
- DefinePoint(/*U*/FXP_ONE_HALF, // middle
- /*V*/fxpParam,
- /*pointStorageOffset*/pointOffset);
- }
- }
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::QuadGenerateConnectivity
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::QuadGenerateConnectivity( const PROCESSED_TESS_FACTORS_QUAD& processedTessFactors )
-{
- // Generate primitives for all the concentric rings, one side at a time for each ring
- static const int startRing = 1;
- int numPointRowsToCenter[QUAD_AXES] = {((processedTessFactors.numPointsForInsideTessFactor[U]+1) >> 1),
- ((processedTessFactors.numPointsForInsideTessFactor[V]+1) >> 1)}; // +1 is so even tess includes the center point
- int numRings = min(numPointRowsToCenter[U],numPointRowsToCenter[V]);
- int degeneratePointRing[QUAD_AXES] = { // Even partitioning causes degenerate row of points,
- // which results in exceptions to the point ordering conventions
- // when travelling around the rings counterclockwise.
- (TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[V]) ? numPointRowsToCenter[V] - 1 : -1,
- (TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[U]) ? numPointRowsToCenter[U] - 1 : -1 };
-
- const TESS_FACTOR_CONTEXT* outsideTessFactorCtx[QUAD_EDGES] = {&processedTessFactors.outsideTessFactorCtx[Ueq0],
- &processedTessFactors.outsideTessFactorCtx[Veq0],
- &processedTessFactors.outsideTessFactorCtx[Ueq1],
- &processedTessFactors.outsideTessFactorCtx[Veq1]};
- TESSELLATOR_PARITY outsideTessFactorParity[QUAD_EDGES] = {processedTessFactors.outsideTessFactorParity[Ueq0],
- processedTessFactors.outsideTessFactorParity[Veq0],
- processedTessFactors.outsideTessFactorParity[Ueq1],
- processedTessFactors.outsideTessFactorParity[Veq1]};
- int numPointsForOutsideEdge[QUAD_EDGES] = {processedTessFactors.numPointsForOutsideEdge[Ueq0],
- processedTessFactors.numPointsForOutsideEdge[Veq0],
- processedTessFactors.numPointsForOutsideEdge[Ueq1],
- processedTessFactors.numPointsForOutsideEdge[Veq1]};
-
- int insideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset;
- int outsideEdgePointBaseOffset = 0;
- int edge;
- for(int ring = startRing; ring < numRings; ring++)
- {
- int numPointsForInsideEdge[QUAD_AXES] = {processedTessFactors.numPointsForInsideTessFactor[U] - 2*ring,
- processedTessFactors.numPointsForInsideTessFactor[V] - 2*ring};
-
- int edge0InsidePointBaseOffset = insideEdgePointBaseOffset;
- int edge0OutsidePointBaseOffset = outsideEdgePointBaseOffset;
-
- for(edge = 0; edge < QUAD_EDGES; edge++ )
- {
- int parity = (edge+1)&0x1;
-
- int numTriangles = numPointsForInsideEdge[parity] + numPointsForOutsideEdge[edge] - 2;
- int insideBaseOffset;
- int outsideBaseOffset;
- if( edge == 3 ) // We need to patch the indexing so Stitch() can think it sees
- // 2 sequentially increasing rows of points, even though we have wrapped around
- // to the end of the inner and outer ring's points, so the last point is really
- // the first point for the ring.
- // We make it so that when Stitch() calls AddIndex(), that function
- // will do any necessary index adjustment.
- {
- if( ring == degeneratePointRing[parity] )
- {
- m_IndexPatchContext2.baseIndexToInvert = insideEdgePointBaseOffset + 1;
- m_IndexPatchContext2.cornerCaseBadValue = outsideEdgePointBaseOffset + numPointsForOutsideEdge[edge] - 1;
- m_IndexPatchContext2.cornerCaseReplacementValue = edge0OutsidePointBaseOffset;
- m_IndexPatchContext2.indexInversionEndPoint = (m_IndexPatchContext2.baseIndexToInvert << 1) - 1;
- insideBaseOffset = m_IndexPatchContext2.baseIndexToInvert;
- outsideBaseOffset = outsideEdgePointBaseOffset;
- SetUsingPatchedIndices2(true);
- }
- else
- {
- m_IndexPatchContext.insidePointIndexDeltaToRealValue = insideEdgePointBaseOffset;
- m_IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge[parity] - 1;
- m_IndexPatchContext.insidePointIndexReplacementValue = edge0InsidePointBaseOffset;
- m_IndexPatchContext.outsidePointIndexPatchBase = m_IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range
- m_IndexPatchContext.outsidePointIndexDeltaToRealValue = outsideEdgePointBaseOffset
- - m_IndexPatchContext.outsidePointIndexPatchBase;
- m_IndexPatchContext.outsidePointIndexBadValue = m_IndexPatchContext.outsidePointIndexPatchBase
- + numPointsForOutsideEdge[edge] - 1;
- m_IndexPatchContext.outsidePointIndexReplacementValue = edge0OutsidePointBaseOffset;
-
- insideBaseOffset = 0;
- outsideBaseOffset = m_IndexPatchContext.outsidePointIndexPatchBase;
- SetUsingPatchedIndices(true);
- }
- }
- else if( (edge == 2) && (ring == degeneratePointRing[parity]) )
- {
- m_IndexPatchContext2.baseIndexToInvert = insideEdgePointBaseOffset;
- m_IndexPatchContext2.cornerCaseBadValue = -1; // unused
- m_IndexPatchContext2.cornerCaseReplacementValue = -1; // unused
- m_IndexPatchContext2.indexInversionEndPoint = m_IndexPatchContext2.baseIndexToInvert << 1;
- insideBaseOffset = m_IndexPatchContext2.baseIndexToInvert;
- outsideBaseOffset = outsideEdgePointBaseOffset;
- SetUsingPatchedIndices2(true);
- }
- else
- {
- insideBaseOffset = insideEdgePointBaseOffset;
- outsideBaseOffset = outsideEdgePointBaseOffset;
- }
- if( ring == startRing )
- {
- StitchTransition(/*baseIndexOffset: */m_NumIndices,
- insideBaseOffset,processedTessFactors.insideTessFactorCtx[parity].numHalfTessFactorPoints,processedTessFactors.insideTessFactorParity[parity],
- outsideBaseOffset,outsideTessFactorCtx[edge]->numHalfTessFactorPoints,outsideTessFactorParity[edge]);
- }
- else
- {
- StitchRegular(/*bTrapezoid*/true, DIAGONALS_MIRRORED,
- /*baseIndexOffset: */m_NumIndices,
- numPointsForInsideEdge[parity],
- insideBaseOffset,outsideBaseOffset);
- }
- SetUsingPatchedIndices(false);
- SetUsingPatchedIndices2(false);
- m_NumIndices += numTriangles*3;
- outsideEdgePointBaseOffset += numPointsForOutsideEdge[edge] - 1;
- if( (edge == 2) && (ring == degeneratePointRing[parity]) )
- {
- insideEdgePointBaseOffset -= numPointsForInsideEdge[parity] - 1;
- }
- else
- {
- insideEdgePointBaseOffset += numPointsForInsideEdge[parity] - 1;
- }
- numPointsForOutsideEdge[edge] = numPointsForInsideEdge[parity];
- }
- if( startRing == ring )
- {
- for(edge = 0; edge < QUAD_EDGES; edge++ )
- {
- outsideTessFactorCtx[edge] = &processedTessFactors.insideTessFactorCtx[edge&1];
- outsideTessFactorParity[edge] = processedTessFactors.insideTessFactorParity[edge&1];
- }
- }
- }
-
- // Triangulate center - a row of quads if odd
- // This triangulation may be producing diagonals that are asymmetric about
- // the center of the patch in this region.
- if( (processedTessFactors.numPointsForInsideTessFactor[U] > processedTessFactors.numPointsForInsideTessFactor[V]) &&
- (TESSELLATOR_PARITY_ODD == processedTessFactors.insideTessFactorParity[V] ) )
- {
- SetUsingPatchedIndices2(true);
- int stripNumQuads = (((processedTessFactors.numPointsForInsideTessFactor[U]>>1) - (processedTessFactors.numPointsForInsideTessFactor[V]>>1))<<1)+
- ((TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[U] ) ? 2 : 1);
- m_IndexPatchContext2.baseIndexToInvert = outsideEdgePointBaseOffset + stripNumQuads + 2;
- m_IndexPatchContext2.cornerCaseBadValue = m_IndexPatchContext2.baseIndexToInvert;
- m_IndexPatchContext2.cornerCaseReplacementValue = outsideEdgePointBaseOffset;
- m_IndexPatchContext2.indexInversionEndPoint = m_IndexPatchContext2.baseIndexToInvert +
- m_IndexPatchContext2.baseIndexToInvert + stripNumQuads;
- StitchRegular(/*bTrapezoid*/false,DIAGONALS_INSIDE_TO_OUTSIDE,
- /*baseIndexOffset: */m_NumIndices, /*numInsideEdgePoints:*/stripNumQuads+1,
- /*insideEdgePointBaseOffset*/m_IndexPatchContext2.baseIndexToInvert,
- outsideEdgePointBaseOffset+1);
- SetUsingPatchedIndices2(false);
- m_NumIndices += stripNumQuads*6;
- }
- else if((processedTessFactors.numPointsForInsideTessFactor[V] >= processedTessFactors.numPointsForInsideTessFactor[U]) &&
- (TESSELLATOR_PARITY_ODD == processedTessFactors.insideTessFactorParity[U]) )
- {
- SetUsingPatchedIndices2(true);
- int stripNumQuads = (((processedTessFactors.numPointsForInsideTessFactor[V]>>1) - (processedTessFactors.numPointsForInsideTessFactor[U]>>1))<<1)+
- ((TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[V] ) ? 2 : 1);
- m_IndexPatchContext2.baseIndexToInvert = outsideEdgePointBaseOffset + stripNumQuads + 1;
- m_IndexPatchContext2.cornerCaseBadValue = -1; // unused
- m_IndexPatchContext2.indexInversionEndPoint = m_IndexPatchContext2.baseIndexToInvert +
- m_IndexPatchContext2.baseIndexToInvert + stripNumQuads;
- DIAGONALS diag = (TESSELLATOR_PARITY_EVEN == processedTessFactors.insideTessFactorParity[V]) ?
- DIAGONALS_INSIDE_TO_OUTSIDE : DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE;
- StitchRegular(/*bTrapezoid*/false,diag,
- /*baseIndexOffset: */m_NumIndices, /*numInsideEdgePoints:*/stripNumQuads+1,
- /*insideEdgePointBaseOffset*/m_IndexPatchContext2.baseIndexToInvert,
- outsideEdgePointBaseOffset);
- SetUsingPatchedIndices2(false);
- m_NumIndices += stripNumQuads*6;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::TessellateTriDomain
-// User calls this
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::TessellateTriDomain( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Weq0,
- float insideTessFactor )
-{
- PROCESSED_TESS_FACTORS_TRI processedTessFactors;
- TriProcessTessFactors(tessFactor_Ueq0,tessFactor_Veq0,tessFactor_Weq0,insideTessFactor,processedTessFactors);
-
- if( processedTessFactors.bPatchCulled )
- {
- m_NumPoints = 0;
- m_NumIndices = 0;
- return;
- }
- else if( processedTessFactors.bJustDoMinimumTessFactor )
- {
- DefinePoint(/*U*/0,/*V*/FXP_ONE,/*pointStorageOffset*/0); //V=1 (beginning of Ueq0 edge VW)
- DefinePoint(/*U*/0,/*V*/0,/*pointStorageOffset*/1); //W=1 (beginning of Veq0 edge WU)
- DefinePoint(/*U*/FXP_ONE,/*V*/0,/*pointStorageOffset*/2); //U=1 (beginning of Weq0 edge UV)
- m_NumPoints = 3;
-
- switch(m_outputPrimitive)
- {
- case D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW:
- case D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
- // function orients them CCW if needed
- DefineClockwiseTriangle(0,1,2,/*indexStorageBaseOffset*/m_NumIndices);
- m_NumIndices = 3;
- break;
- case D3D11_TESSELLATOR_OUTPUT_POINT:
- DumpAllPoints();
- break;
- case D3D11_TESSELLATOR_OUTPUT_LINE:
- DumpAllPointsAsInOrderLineList();
- break;
- }
- return;
- }
-
- TriGeneratePoints(processedTessFactors);
-
- if( m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_POINT )
- {
- DumpAllPoints();
- return;
- }
- if( m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_LINE )
- {
- DumpAllPointsAsInOrderLineList();
- return;
- }
-
- TriGenerateConnectivity(processedTessFactors); // can be done in parallel to TriGeneratePoints()
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::TriProcessTessFactors
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::TriProcessTessFactors( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Weq0,
- float insideTessFactor, PROCESSED_TESS_FACTORS_TRI& processedTessFactors )
-{
- // Is the patch culled?
- if( !(tessFactor_Ueq0 > 0) || // NaN will pass
- !(tessFactor_Veq0 > 0) ||
- !(tessFactor_Weq0 > 0) )
- {
- processedTessFactors.bPatchCulled = true;
- return;
- }
- else
- {
- processedTessFactors.bPatchCulled = false;
- }
-
- // Clamp edge TessFactors
- float lowerBound = 0.0, upperBound = 0.0;
- switch(m_originalPartitioning)
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- case D3D11_TESSELLATOR_PARTITIONING_POW2: // don�t care about pow2 distinction for validation, just treat as integer
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
- break;
- }
-
- tessFactor_Ueq0 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Ueq0 ) );
- tessFactor_Veq0 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Veq0 ) );
- tessFactor_Weq0 = tess_fmin( upperBound, tess_fmax( lowerBound, tessFactor_Weq0 ) );
-
- if( HWIntegerPartitioning()) // pow2 or integer, round to next int (hw doesn't care about pow2 distinction)
- {
- tessFactor_Ueq0 = ceil(tessFactor_Ueq0);
- tessFactor_Veq0 = ceil(tessFactor_Veq0);
- tessFactor_Weq0 = ceil(tessFactor_Weq0);
- }
-
- // Clamp inside TessFactors
- if(D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD == m_originalPartitioning)
- {
- if( (tessFactor_Ueq0 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor_Veq0 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON) ||
- (tessFactor_Weq0 > MIN_ODD_TESSFACTOR_PLUS_HALF_EPSILON))
- // Don't need the same check for insideTessFactor for tri patches,
- // since there is only one insideTessFactor, as opposed to quad
- // patches which have 2 insideTessFactors.
- {
- // Force picture frame
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR + EPSILON;
- }
- }
-
- insideTessFactor = tess_fmin( upperBound, tess_fmax( lowerBound, insideTessFactor ) );
- // Note the above clamps map NaN to lowerBound
-
- if( HWIntegerPartitioning()) // pow2 or integer, round to next int (hw doesn't care about pow2 distinction)
- {
- insideTessFactor = ceil(insideTessFactor);
- }
-
- // Reset our vertex and index buffers. We have enough storage for the max tessFactor.
- m_NumPoints = 0;
- m_NumIndices = 0;
-
- // Process tessFactors
- float outsideTessFactor[TRI_EDGES] = {tessFactor_Ueq0, tessFactor_Veq0, tessFactor_Weq0};
- int edge;
- if( HWIntegerPartitioning() )
- {
- for( edge = 0; edge < TRI_EDGES; edge++ )
- {
- int edgeEven = isEven(outsideTessFactor[edge]);
- processedTessFactors.outsideTessFactorParity[edge] = edgeEven ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- processedTessFactors.insideTessFactorParity = (isEven(insideTessFactor) || (FLOAT_ONE == insideTessFactor))
- ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- else
- {
- for( edge = 0; edge < TRI_EDGES; edge++ )
- {
- processedTessFactors.outsideTessFactorParity[edge] = m_originalParity;
- }
- processedTessFactors.insideTessFactorParity = m_originalParity;
- }
-
- // Save fixed point TessFactors
- for( edge = 0; edge < TRI_EDGES; edge++ )
- {
- processedTessFactors.outsideTessFactor[edge] = floatToFixed(outsideTessFactor[edge]);
- }
- processedTessFactors.insideTessFactor = floatToFixed(insideTessFactor);
-
- if( HWIntegerPartitioning() || Odd() )
- {
- // Special case if all TessFactors are 1
- if( (FXP_ONE == processedTessFactors.insideTessFactor) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Ueq0]) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Veq0]) &&
- (FXP_ONE == processedTessFactors.outsideTessFactor[Weq0]) )
- {
- processedTessFactors.bJustDoMinimumTessFactor = true;
- return;
- }
- }
- processedTessFactors.bJustDoMinimumTessFactor = false;
-
- // Compute per-TessFactor metadata
- for(edge = 0; edge < TRI_EDGES; edge++ )
- {
- SetTessellationParity(processedTessFactors.outsideTessFactorParity[edge]);
- ComputeTessFactorContext(processedTessFactors.outsideTessFactor[edge], processedTessFactors.outsideTessFactorCtx[edge]);
- }
- SetTessellationParity(processedTessFactors.insideTessFactorParity);
- ComputeTessFactorContext(processedTessFactors.insideTessFactor, processedTessFactors.insideTessFactorCtx);
-
- // Compute some initial data.
-
- // outside edge offsets and storage
- for(edge = 0; edge < TRI_EDGES; edge++ )
- {
- SetTessellationParity(processedTessFactors.outsideTessFactorParity[edge]);
- processedTessFactors.numPointsForOutsideEdge[edge] = NumPointsForTessFactor(processedTessFactors.outsideTessFactor[edge]);
- m_NumPoints += processedTessFactors.numPointsForOutsideEdge[edge];
- }
- m_NumPoints -= 3;
-
- // inside edge offsets
- SetTessellationParity(processedTessFactors.insideTessFactorParity);
- processedTessFactors.numPointsForInsideTessFactor = NumPointsForTessFactor(processedTessFactors.insideTessFactor);
- {
- int pointCountMin = Odd() ? 4 : 3;
- // max() allows degenerate transition regions when inside TessFactor == 1
- processedTessFactors.numPointsForInsideTessFactor = max(pointCountMin,processedTessFactors.numPointsForInsideTessFactor);
- }
-
- processedTessFactors.insideEdgePointBaseOffset = m_NumPoints;
-
- // inside storage, including interior edges above
- {
- int numInteriorRings = (processedTessFactors.numPointsForInsideTessFactor >> 1) - 1;
- int numInteriorPoints;
- if( Odd() )
- {
- numInteriorPoints = TRI_EDGES*(numInteriorRings*(numInteriorRings+1) - numInteriorRings);
- }
- else
- {
- numInteriorPoints = TRI_EDGES*(numInteriorRings*(numInteriorRings+1)) + 1;
- }
- m_NumPoints += numInteriorPoints;
- }
-
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::TriGeneratePoints
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::TriGeneratePoints( const PROCESSED_TESS_FACTORS_TRI& processedTessFactors )
-{
- // Generate exterior ring edge points, clockwise starting from point V (VW, the U==0 edge)
- int pointOffset = 0;
- int edge;
- for(edge = 0; edge < TRI_EDGES; edge++ )
- {
- int parity = edge&0x1;
- int startPoint = 0;
- int endPoint = processedTessFactors.numPointsForOutsideEdge[edge] - 1;
- for(int p = startPoint; p < endPoint; p++, pointOffset++) // don't include end, since next edge starts with it.
- {
- FXP fxpParam;
- int q = (parity) ? p : endPoint - p; // whether to reverse point order given we are defining V or U (W implicit):
- // edge0, VW, has V decreasing, so reverse 1D points below
- // edge1, WU, has U increasing, so don't reverse 1D points below
- // edge2, UV, has U decreasing, so reverse 1D points below
- SetTessellationParity(processedTessFactors.outsideTessFactorParity[edge]);
- PlacePointIn1D(processedTessFactors.outsideTessFactorCtx[edge],q,fxpParam);
- if( edge == 0 )
- {
- DefinePoint(/*U*/0,
- /*V*/fxpParam,
- /*pointStorageOffset*/pointOffset);
- }
- else
- {
- DefinePoint(/*U*/fxpParam,
- /*V*/(edge == 2) ? FXP_ONE - fxpParam : 0,
- /*pointStorageOffset*/pointOffset);
- }
- }
- }
-
- // Generate interior ring points, clockwise spiralling in
- SetTessellationParity(processedTessFactors.insideTessFactorParity);
- static const int startRing = 1;
- int numRings = (processedTessFactors.numPointsForInsideTessFactor >> 1);
- for(int ring = startRing; ring < numRings; ring++)
- {
- int startPoint = ring;
- int endPoint = processedTessFactors.numPointsForInsideTessFactor - 1 - startPoint;
-
- for(edge = 0; edge < TRI_EDGES; edge++ )
- {
- int parity = edge&0x1;
- int perpendicularAxisPoint = startPoint;
- FXP fxpPerpParam;
- PlacePointIn1D(processedTessFactors.insideTessFactorCtx,perpendicularAxisPoint,fxpPerpParam);
- fxpPerpParam *= FXP_TWO_THIRDS; // Map location to the right size in barycentric space.
- // I (amarp) can draw a picture to explain.
- // We know this fixed point math won't over/underflow
- fxpPerpParam = (fxpPerpParam+FXP_ONE_HALF/*round*/)>>FXP_FRACTION_BITS; // get back to n.16
- for(int p = startPoint; p < endPoint; p++, pointOffset++) // don't include end: next edge starts with it.
- {
- FXP fxpParam;
- int q = (parity) ? p : endPoint - (p - startPoint); // whether to reverse point given we are defining V or U (W implicit):
- // edge0, VW, has V decreasing, so reverse 1D points below
- // edge1, WU, has U increasing, so don't reverse 1D points below
- // edge2, UV, has U decreasing, so reverse 1D points below
- PlacePointIn1D(processedTessFactors.insideTessFactorCtx,q,fxpParam);
- // edge0 VW, has perpendicular parameter U constant
- // edge1 WU, has perpendicular parameter V constant
- // edge2 UV, has perpendicular parameter W constant
- const unsigned int deriv = 2; // reciprocal is the rate of change of edge-parallel parameters as they are pushed into the triangle
- switch(edge)
- {
- case 0:
- DefinePoint(/*U*/fxpPerpParam,
- /*V*/fxpParam - (fxpPerpParam+1/*round*/)/deriv, // we know this fixed point math won't over/underflow
- /*pointStorageOffset*/pointOffset);
- break;
- case 1:
- DefinePoint(/*U*/fxpParam - (fxpPerpParam+1/*round*/)/deriv,// we know this fixed point math won't over/underflow
- /*V*/fxpPerpParam,
- /*pointStorageOffset*/pointOffset);
- break;
- case 2:
- DefinePoint(/*U*/fxpParam - (fxpPerpParam+1/*round*/)/deriv,// we know this fixed point math won't over/underflow
- /*V*/FXP_ONE - (fxpParam - (fxpPerpParam+1/*round*/)/deriv) - fxpPerpParam,// we know this fixed point math won't over/underflow
- /*pointStorageOffset*/pointOffset);
- break;
- }
- }
- }
- }
- if( !Odd() )
- {
- // Last point is the point at the center.
- DefinePoint(/*U*/FXP_ONE_THIRD,
- /*V*/FXP_ONE_THIRD,
- /*pointStorageOffset*/pointOffset);
- }
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::TriGenerateConnectivity
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::TriGenerateConnectivity( const PROCESSED_TESS_FACTORS_TRI& processedTessFactors )
-{
- // Generate primitives for all the concentric rings, one side at a time for each ring
- static const int startRing = 1;
- int numRings = ((processedTessFactors.numPointsForInsideTessFactor+1) >> 1); // +1 is so even tess includes the center point, which we want to now
- const TESS_FACTOR_CONTEXT* outsideTessFactorCtx[TRI_EDGES] = {&processedTessFactors.outsideTessFactorCtx[Ueq0],
- &processedTessFactors.outsideTessFactorCtx[Veq0],
- &processedTessFactors.outsideTessFactorCtx[Weq0]};
- TESSELLATOR_PARITY outsideTessFactorParity[TRI_EDGES] = {processedTessFactors.outsideTessFactorParity[Ueq0],
- processedTessFactors.outsideTessFactorParity[Veq0],
- processedTessFactors.outsideTessFactorParity[Weq0]};
- int numPointsForOutsideEdge[TRI_EDGES] = {processedTessFactors.numPointsForOutsideEdge[Ueq0],
- processedTessFactors.numPointsForOutsideEdge[Veq0],
- processedTessFactors.numPointsForOutsideEdge[Weq0]};
-
- int insideEdgePointBaseOffset = processedTessFactors.insideEdgePointBaseOffset;
- int outsideEdgePointBaseOffset = 0;
- int edge;
- for(int ring = startRing; ring < numRings; ring++)
- {
- int numPointsForInsideEdge = processedTessFactors.numPointsForInsideTessFactor - 2*ring;
- int edge0InsidePointBaseOffset = insideEdgePointBaseOffset;
- int edge0OutsidePointBaseOffset = outsideEdgePointBaseOffset;
- for(edge = 0; edge < TRI_EDGES; edge++ )
- {
- int numTriangles = numPointsForInsideEdge + numPointsForOutsideEdge[edge] - 2;
-
- int insideBaseOffset;
- int outsideBaseOffset;
- if( edge == 2 )
- {
- m_IndexPatchContext.insidePointIndexDeltaToRealValue = insideEdgePointBaseOffset;
- m_IndexPatchContext.insidePointIndexBadValue = numPointsForInsideEdge - 1;
- m_IndexPatchContext.insidePointIndexReplacementValue = edge0InsidePointBaseOffset;
- m_IndexPatchContext.outsidePointIndexPatchBase = m_IndexPatchContext.insidePointIndexBadValue+1; // past inside patched index range
- m_IndexPatchContext.outsidePointIndexDeltaToRealValue = outsideEdgePointBaseOffset
- - m_IndexPatchContext.outsidePointIndexPatchBase;
- m_IndexPatchContext.outsidePointIndexBadValue = m_IndexPatchContext.outsidePointIndexPatchBase
- + numPointsForOutsideEdge[edge] - 1;
- m_IndexPatchContext.outsidePointIndexReplacementValue = edge0OutsidePointBaseOffset;
- SetUsingPatchedIndices(true);
- insideBaseOffset = 0;
- outsideBaseOffset = m_IndexPatchContext.outsidePointIndexPatchBase;
- }
- else
- {
- insideBaseOffset = insideEdgePointBaseOffset;
- outsideBaseOffset = outsideEdgePointBaseOffset;
- }
- if( ring == startRing )
- {
- StitchTransition(/*baseIndexOffset: */m_NumIndices,
- insideBaseOffset,processedTessFactors.insideTessFactorCtx.numHalfTessFactorPoints,processedTessFactors.insideTessFactorParity,
- outsideBaseOffset,outsideTessFactorCtx[edge]->numHalfTessFactorPoints,outsideTessFactorParity[edge]);
- }
- else
- {
- StitchRegular(/*bTrapezoid*/true, DIAGONALS_MIRRORED,
- /*baseIndexOffset: */m_NumIndices,
- numPointsForInsideEdge,
- insideBaseOffset,outsideBaseOffset);
- }
- if( 2 == edge )
- {
- SetUsingPatchedIndices(false);
- }
- m_NumIndices += numTriangles*3;
- outsideEdgePointBaseOffset += numPointsForOutsideEdge[edge] - 1;
- insideEdgePointBaseOffset += numPointsForInsideEdge - 1;
- numPointsForOutsideEdge[edge] = numPointsForInsideEdge;
- }
- if( startRing == ring )
- {
- for(edge = 0; edge < TRI_EDGES; edge++ )
- {
- outsideTessFactorCtx[edge] = &processedTessFactors.insideTessFactorCtx;
- outsideTessFactorParity[edge] = processedTessFactors.insideTessFactorParity;
- }
- }
- }
- if( Odd() )
- {
- // Triangulate center (a single triangle)
- DefineClockwiseTriangle(outsideEdgePointBaseOffset, outsideEdgePointBaseOffset+1, outsideEdgePointBaseOffset+2,
- m_NumIndices);
- m_NumIndices += 3;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::TessellateIsoLineDomain
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::TessellateIsoLineDomain( float TessFactor_V_LineDensity, float TessFactor_U_LineDetail )
-{
- PROCESSED_TESS_FACTORS_ISOLINE processedTessFactors;
- IsoLineProcessTessFactors(TessFactor_V_LineDensity,TessFactor_U_LineDetail,processedTessFactors);
- if( processedTessFactors.bPatchCulled )
- {
- m_NumPoints = 0;
- m_NumIndices = 0;
- return;
- }
- IsoLineGeneratePoints(processedTessFactors);
- IsoLineGenerateConnectivity(processedTessFactors); // can be done in parallel to IsoLineGeneratePoints
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::IsoLineProcessTessFactors
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::IsoLineProcessTessFactors( float TessFactor_V_LineDensity, float TessFactor_U_LineDetail,
- PROCESSED_TESS_FACTORS_ISOLINE& processedTessFactors )
-{
- // Is the patch culled?
- if( !(TessFactor_V_LineDensity > 0) || // NaN will pass
- !(TessFactor_U_LineDetail > 0) )
- {
- processedTessFactors.bPatchCulled = true;
- return;
- }
- else
- {
- processedTessFactors.bPatchCulled = false;
- }
-
- // Clamp edge TessFactors
- float lowerBound = 0.0, upperBound = 0.0;
- switch(m_originalPartitioning)
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- case D3D11_TESSELLATOR_PARTITIONING_POW2: // don�t care about pow2 distinction for validation, just treat as integer
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- lowerBound = D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR;
- break;
-
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- lowerBound = D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR;
- upperBound = D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR;
- break;
- }
-
- TessFactor_V_LineDensity = tess_fmin( D3D11_TESSELLATOR_MAX_ISOLINE_DENSITY_TESSELLATION_FACTOR,
- tess_fmax( D3D11_TESSELLATOR_MIN_ISOLINE_DENSITY_TESSELLATION_FACTOR, TessFactor_V_LineDensity ) );
- TessFactor_U_LineDetail = tess_fmin( upperBound, tess_fmax( lowerBound, TessFactor_U_LineDetail ) );
-
- // Reset our vertex and index buffers. We have enough storage for the max tessFactor.
- m_NumPoints = 0;
- m_NumIndices = 0;
-
- // Process tessFactors
- if( HWIntegerPartitioning() )
- {
- TessFactor_U_LineDetail = ceil(TessFactor_U_LineDetail);
- processedTessFactors.lineDetailParity = isEven(TessFactor_U_LineDetail) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- else
- {
- processedTessFactors.lineDetailParity = m_originalParity;
- }
-
- FXP fxpTessFactor_U_LineDetail = floatToFixed(TessFactor_U_LineDetail);
-
- SetTessellationParity(processedTessFactors.lineDetailParity);
-
- ComputeTessFactorContext(fxpTessFactor_U_LineDetail, processedTessFactors.lineDetailTessFactorCtx);
- processedTessFactors.numPointsPerLine = NumPointsForTessFactor(fxpTessFactor_U_LineDetail);
-
- OverridePartitioning(D3D11_TESSELLATOR_PARTITIONING_INTEGER);
-
- TessFactor_V_LineDensity = ceil(TessFactor_V_LineDensity);
- processedTessFactors.lineDensityParity = isEven(TessFactor_V_LineDensity) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- SetTessellationParity(processedTessFactors.lineDensityParity);
- FXP fxpTessFactor_V_LineDensity = floatToFixed(TessFactor_V_LineDensity);
- ComputeTessFactorContext(fxpTessFactor_V_LineDensity, processedTessFactors.lineDensityTessFactorCtx);
-
- processedTessFactors.numLines = NumPointsForTessFactor(fxpTessFactor_V_LineDensity) - 1; // don't draw last line at V == 1.
-
- RestorePartitioning();
-
- // Compute some initial data.
-
- // outside edge offsets
- m_NumPoints = processedTessFactors.numPointsPerLine * processedTessFactors.numLines;
- if( m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_POINT )
- {
- m_NumIndices = m_NumPoints;
- }
- else // line
- {
- m_NumIndices = processedTessFactors.numLines*(processedTessFactors.numPointsPerLine-1)*2;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::IsoLineGeneratePoints
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::IsoLineGeneratePoints( const PROCESSED_TESS_FACTORS_ISOLINE& processedTessFactors )
-{
- int line, pointOffset;
- for(line = 0, pointOffset = 0; line < processedTessFactors.numLines; line++)
- {
- for(int point = 0; point < processedTessFactors.numPointsPerLine; point++)
- {
- FXP fxpU,fxpV;
- SetTessellationParity(processedTessFactors.lineDensityParity);
- PlacePointIn1D(processedTessFactors.lineDensityTessFactorCtx,line,fxpV);
-
- SetTessellationParity(processedTessFactors.lineDetailParity);
- PlacePointIn1D(processedTessFactors.lineDetailTessFactorCtx,point,fxpU);
-
- DefinePoint(fxpU,fxpV,pointOffset++);
- }
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::IsoLineGenerateConnectivity
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::IsoLineGenerateConnectivity( const PROCESSED_TESS_FACTORS_ISOLINE& processedTessFactors )
-{
- int line, pointOffset, indexOffset;
- if( m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_POINT )
- {
- for(line = 0, pointOffset = 0, indexOffset = 0; line < processedTessFactors.numLines; line++)
- {
- for(int point = 0; point < processedTessFactors.numPointsPerLine; point++)
- {
- DefineIndex(pointOffset++,indexOffset++);
- }
- }
- }
- else // line
- {
- for(line = 0, pointOffset = 0, indexOffset = 0; line < processedTessFactors.numLines; line++)
- {
- for(int point = 0; point < processedTessFactors.numPointsPerLine; point++)
- {
- if( point > 0 )
- {
- DefineIndex(pointOffset-1,indexOffset++);
- DefineIndex(pointOffset,indexOffset++);
- }
- pointOffset++;
- }
- }
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::GetPointCount
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-int CHWTessellator::GetPointCount()
-{
- return m_NumPoints;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::GetIndexCount()
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-int CHWTessellator::GetIndexCount()
-{
- return m_NumIndices;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::GetPoints()
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-DOMAIN_POINT* CHWTessellator::GetPoints()
-{
- return m_Point;
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::GetIndices()
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-int* CHWTessellator::GetIndices()
-{
- return m_Index;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::DefinePoint()
-//---------------------------------------------------------------------------------------------------------------------------------
-int CHWTessellator::DefinePoint(FXP fxpU, FXP fxpV, int pointStorageOffset)
-{
-// WCHAR foo[80];
-// StringCchPrintf(foo,80,L"off:%d, uv=(%f,%f)\n",pointStorageOffset,fixedToFloat(fxpU),fixedToFloat(fxpV));
-// OutputDebugString(foo);
- m_Point[pointStorageOffset].u = fixedToFloat(fxpU);
- m_Point[pointStorageOffset].v = fixedToFloat(fxpV);
- return pointStorageOffset;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::DefineIndex()
-//--------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::DefineIndex(int index, int indexStorageOffset)
-{
- index = PatchIndexValue(index);
-// WCHAR foo[80];
-// StringCchPrintf(foo,80,L"off:%d, idx=%d, uv=(%f,%f)\n",indexStorageOffset,index,m_Point[index].u,m_Point[index].v);
-// OutputDebugString(foo);
- m_Index[indexStorageOffset] = index;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::DefineClockwiseTriangle()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::DefineClockwiseTriangle(int index0, int index1, int index2, int indexStorageBaseOffset)
-{
- // inputs a clockwise triangle, stores a CW or CCW triangle depending on the state
- DefineIndex(index0,indexStorageBaseOffset);
- bool bWantClockwise = (m_outputPrimitive == D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW) ? true : false;
- if( bWantClockwise )
- {
- DefineIndex(index1,indexStorageBaseOffset+1);
- DefineIndex(index2,indexStorageBaseOffset+2);
- }
- else
- {
- DefineIndex(index2,indexStorageBaseOffset+1);
- DefineIndex(index1,indexStorageBaseOffset+2);
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::DumpAllPoints()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::DumpAllPoints()
-{
- for( int p = 0; p < m_NumPoints; p++ )
- {
- DefineIndex(p,m_NumIndices++);
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::DumpAllPointsAsInOrderLineList()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::DumpAllPointsAsInOrderLineList()
-{
- for( int p = 1; p < m_NumPoints; p++ )
- {
- DefineIndex(p-1,m_NumIndices++);
- DefineIndex(p,m_NumIndices++);
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// RemoveMSB
-//---------------------------------------------------------------------------------------------------------------------------------
-int RemoveMSB(int val)
-{
- int check;
- if( val <= 0x0000ffff ) { check = ( val <= 0x000000ff ) ? 0x00000080 : 0x00008000; }
- else { check = ( val <= 0x00ffffff ) ? 0x00800000 : 0x80000000; }
- for( int i = 0; i < 8; i++, check >>= 1 ) { if( val & check ) return (val & ~check); }
- return 0;
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// GetMSB
-//---------------------------------------------------------------------------------------------------------------------------------
-int GetMSB(int val)
-{
- int check;
- if( val <= 0x0000ffff ) { check = ( val <= 0x000000ff ) ? 0x00000080 : 0x00008000; }
- else { check = ( val <= 0x00ffffff ) ? 0x00800000 : 0x80000000; }
- for( int i = 0; i < 8; i++, check >>= 1 ) { if( val & check ) return check; }
- return 0;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::CleanseParameter()
-//---------------------------------------------------------------------------------------------------------------------------------
-/* NOTHING TO DO FOR FIXED POINT ARITHMETIC!
-void CHWTessellator::CleanseParameter(float& parameter)
-{
- // Clean up [0..1] parameter to guarantee that (1 - (1 - parameter)) == parameter.
- parameter = 1.0f - parameter;
- parameter = 1.0f - parameter;
-
-}
-*/
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::NumPointsForTessFactor()
-//---------------------------------------------------------------------------------------------------------------------------------
-int CHWTessellator::NumPointsForTessFactor( FXP fxpTessFactor )
-{
- int numPoints;
- if( Odd() )
- {
- numPoints = (fxpCeil(FXP_ONE_HALF + (fxpTessFactor+1/*round*/)/2)*2)>>FXP_FRACTION_BITS;
- }
- else
- {
- numPoints = ((fxpCeil((fxpTessFactor+1/*round*/)/2)*2)>>FXP_FRACTION_BITS)+1;
- }
- return numPoints;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::ComputeTessFactorContext()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::ComputeTessFactorContext( FXP fxpTessFactor, TESS_FACTOR_CONTEXT& TessFactorCtx )
-{
- FXP fxpHalfTessFactor = (fxpTessFactor+1/*round*/)/2;
- if( Odd() || (fxpHalfTessFactor == FXP_ONE_HALF)) // fxpHalfTessFactor == 1/2 if TessFactor is 1, but we're pretending we are even.
- {
- fxpHalfTessFactor += FXP_ONE_HALF;
- }
- FXP fxpFloorHalfTessFactor = fxpFloor(fxpHalfTessFactor);
- FXP fxpCeilHalfTessFactor = fxpCeil(fxpHalfTessFactor);
- TessFactorCtx.fxpHalfTessFactorFraction = fxpHalfTessFactor - fxpFloorHalfTessFactor;
- //CleanseParameter(TessFactorCtx.fxpHalfTessFactorFraction);
- TessFactorCtx.numHalfTessFactorPoints = (fxpCeilHalfTessFactor>>FXP_FRACTION_BITS); // for EVEN, we don't include the point always fixed at the midpoint of the TessFactor
- if( fxpCeilHalfTessFactor == fxpFloorHalfTessFactor )
- {
- TessFactorCtx.splitPointOnFloorHalfTessFactor = /*pick value to cause this to be ignored*/ TessFactorCtx.numHalfTessFactorPoints+1;
- }
- else if( Odd() )
- {
- if( fxpFloorHalfTessFactor == FXP_ONE )
- {
- TessFactorCtx.splitPointOnFloorHalfTessFactor = 0;
- }
- else
- {
-#ifdef ALLOW_XBOX_360_COMPARISON
- if( m_bXBox360Mode )
- TessFactorCtx.splitPointOnFloorHalfTessFactor = TessFactorCtx.numHalfTessFactorPoints-2;
- else
-#endif
- TessFactorCtx.splitPointOnFloorHalfTessFactor = (RemoveMSB((fxpFloorHalfTessFactor>>FXP_FRACTION_BITS)-1)<<1) + 1;
- }
- }
- else
- {
-#ifdef ALLOW_XBOX_360_COMPARISON
- if( m_bXBox360Mode )
- TessFactorCtx.splitPointOnFloorHalfTessFactor = TessFactorCtx.numHalfTessFactorPoints-1;
- else
-#endif
- TessFactorCtx.splitPointOnFloorHalfTessFactor = (RemoveMSB(fxpFloorHalfTessFactor>>FXP_FRACTION_BITS)<<1) + 1;
- }
- int numFloorSegments = (fxpFloorHalfTessFactor * 2)>>FXP_FRACTION_BITS;
- int numCeilSegments = (fxpCeilHalfTessFactor * 2)>>FXP_FRACTION_BITS;
- if( Odd() )
- {
- numFloorSegments -= 1;
- numCeilSegments -= 1;
- }
- TessFactorCtx.fxpInvNumSegmentsOnFloorTessFactor = s_fixedReciprocal[numFloorSegments];
- TessFactorCtx.fxpInvNumSegmentsOnCeilTessFactor = s_fixedReciprocal[numCeilSegments];
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::PlacePointIn1D()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::PlacePointIn1D( const TESS_FACTOR_CONTEXT& TessFactorCtx, int point, FXP& fxpLocation )
-{
- bool bFlip;
- if( point >= TessFactorCtx.numHalfTessFactorPoints )
- {
- point = (TessFactorCtx.numHalfTessFactorPoints << 1) - point;
- if( Odd() )
- {
- point -= 1;
- }
- bFlip = true;
- }
- else
- {
- bFlip = false;
- }
- if( point == TessFactorCtx.numHalfTessFactorPoints )
- {
- fxpLocation = FXP_ONE_HALF; // special casing middle since 16 bit fixed math below can't reproduce 0.5 exactly
- return;
- }
- unsigned int indexOnCeilHalfTessFactor = point;
- unsigned int indexOnFloorHalfTessFactor = indexOnCeilHalfTessFactor;
- if( point > TessFactorCtx.splitPointOnFloorHalfTessFactor )
- {
- indexOnFloorHalfTessFactor -= 1;
- }
- // For the fixed point multiplies below, we know the results are <= 16 bits because
- // the locations on the halfTessFactor are <= half the number of segments for the total TessFactor.
- // So a number divided by a number that is at least twice as big will give
- // a result no bigger than 0.5 (which in fixed point is 16 bits in our case)
- FXP fxpLocationOnFloorHalfTessFactor = indexOnFloorHalfTessFactor * TessFactorCtx.fxpInvNumSegmentsOnFloorTessFactor;
- FXP fxpLocationOnCeilHalfTessFactor = indexOnCeilHalfTessFactor * TessFactorCtx.fxpInvNumSegmentsOnCeilTessFactor;
-
- // Since we know the numbers calculated above are <= fixed point 0.5, and the equation
- // below is just lerping between two values <= fixed point 0.5 (0x00008000), then we know
- // that the final result before shifting by 16 bits is no larger than 0x80000000. Once we
- // shift that down by 16, we get the result of lerping 2 numbers <= 0.5, which is obviously
- // at most 0.5 (0x00008000)
- fxpLocation = fxpLocationOnFloorHalfTessFactor * (FXP_ONE - TessFactorCtx.fxpHalfTessFactorFraction) +
- fxpLocationOnCeilHalfTessFactor * (TessFactorCtx.fxpHalfTessFactorFraction);
- fxpLocation = (fxpLocation + FXP_ONE_HALF/*round*/) >> FXP_FRACTION_BITS; // get back to n.16
- /* Commenting out floating point version. Note the parameter cleansing it does is not needed in fixed point.
- if( bFlip )
- location = 1.0f - location; // complement produces cleansed result.
- else
- CleanseParameter(location);
- */
- if( bFlip )
- {
- fxpLocation = FXP_ONE - fxpLocation;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::StitchRegular
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::StitchRegular(bool bTrapezoid,DIAGONALS diagonals,
- int baseIndexOffset, int numInsideEdgePoints,
- int insideEdgePointBaseOffset, int outsideEdgePointBaseOffset)
-{
- int insidePoint = insideEdgePointBaseOffset;
- int outsidePoint = outsideEdgePointBaseOffset;
- if( bTrapezoid )
- {
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3; outsidePoint++;
- }
- int p;
- switch( diagonals )
- {
- case DIAGONALS_INSIDE_TO_OUTSIDE:
- // Diagonals pointing from inside edge forward towards outside edge
- for( p = 0; p < numInsideEdgePoints-1; p++ )
- {
- DefineClockwiseTriangle(insidePoint,outsidePoint,outsidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
-
- DefineClockwiseTriangle(insidePoint,outsidePoint+1,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++; outsidePoint++;
- }
- break;
- case DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE: // Assumes ODD tessellation
- // Diagonals pointing from outside edge forward towards inside edge
-
- // First half
- for( p = 0; p < numInsideEdgePoints/2-1; p++ )
- {
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3;
- DefineClockwiseTriangle(insidePoint,outsidePoint+1,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++; outsidePoint++;
- }
-
- // Middle
- DefineClockwiseTriangle(outsidePoint,insidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3;
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++; outsidePoint++; p+=2;
-
- // Second half
- for( ; p < numInsideEdgePoints; p++ )
- {
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3;
- DefineClockwiseTriangle(insidePoint,outsidePoint+1,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++; outsidePoint++;
- }
- break;
- case DIAGONALS_MIRRORED:
- // First half, diagonals pointing from outside of outside edge to inside of inside edge
- for( p = 0; p < numInsideEdgePoints/2; p++ )
- {
- DefineClockwiseTriangle(outsidePoint,insidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3;
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++; outsidePoint++;
- }
- // Second half, diagonals pointing from inside of inside edge to outside of outside edge
- for( ; p < numInsideEdgePoints-1; p++ )
- {
- DefineClockwiseTriangle(insidePoint,outsidePoint,outsidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- DefineClockwiseTriangle(insidePoint,outsidePoint+1,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++; outsidePoint++;
- }
- break;
- }
- if( bTrapezoid )
- {
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::StitchTransition()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHWTessellator::StitchTransition(int baseIndexOffset,
- int insideEdgePointBaseOffset, int insideNumHalfTessFactorPoints,
- TESSELLATOR_PARITY insideEdgeTessFactorParity,
- int outsideEdgePointBaseOffset, int outsideNumHalfTessFactorPoints,
- TESSELLATOR_PARITY outsideTessFactorParity
-)
-{
-
-#ifdef ALLOW_XBOX_360_COMPARISON
- // Tables to assist in the stitching of 2 rows of points having arbitrary TessFactors.
- // The stitching order is governed by Ruler Function vertex split ordering (see external documentation).
- //
- // The contents of the finalPointPositionTable are where vertex i [0..32] ends up on the half-edge
- // at the max tessellation amount given ruler-function split order.
- // Recall the other half of an edge is mirrored, so we only need to deal with one half.
- // This table is used to decide when to advance a point on the interior or exterior.
- // It supports odd TessFactor up to 65 and even TessFactor up to 64.
- static const int _finalPointPositionTable[33] =
- { 0, 32, 16, 8, 17, 4, 18, 9, 19, 2, 20, 10, 21, 5, 22, 11, 23,
- 1, 24, 12, 25, 6, 26, 13, 27, 3, 28, 14, 29, 7, 30, 15, 31 };
- // The loopStart and loopEnd tables below just provide optimal loop bounds for the
- // stitching algorithm further below, for any given halfTssFactor.
- // There is probably a better way to encode this...
-
- // loopStart[halfTessFactor] encodes the FIRST entry other that [0] in finalPointPositionTable[] above which is
- // less than halfTessFactor. Exceptions are entry 0 and 1, which are set up to skip the loop.
- static const int _loopStart[33] =
- {1,1,17,9,9,5,5,5,5,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2};
- // loopStart[halfTessFactor] encodes the LAST entry in finalPointPositionTable[] above which is
- // less than halfTessFactor. Exceptions are entry 0 and 1, which are set up to skip the loop.
- static const int _loopEnd[33] =
- {0,0,17,17,25,25,25,25,29,29,29,29,29,29,29,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,32};
- const int* finalPointPositionTable;
- const int* loopStart;
- const int* loopEnd;
- if( m_bXBox360Mode )
- {
- // The XBox360 vertex introduction order is always from the center of the edge.
- // So the final positions of points on the half-edge are this trivial table.
- static const int XBOXfinalPointPositionTable[33] =
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
- 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 };
- // loopStart and loopEnd (meaning described above) also become trivial for XBox360 splitting.
- static const int XBOXloopStart[33] =
- {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
- static const int XBOXloopEnd[33] =
- {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
-
- finalPointPositionTable = XBOXfinalPointPositionTable;
- loopStart = XBOXloopStart;
- loopEnd = XBOXloopEnd;
- }
- else
- {
- finalPointPositionTable = _finalPointPositionTable;
- loopStart = _loopStart;
- loopEnd =_loopEnd;
- }
-#else
- // Tables to assist in the stitching of 2 rows of points having arbitrary TessFactors.
- // The stitching order is governed by Ruler Function vertex split ordering (see external documentation).
- //
- // The contents of the finalPointPositionTable are where vertex i [0..33] ends up on the half-edge
- // at the max tessellation amount given ruler-function split order.
- // Recall the other half of an edge is mirrored, so we only need to deal with one half.
- // This table is used to decide when to advance a point on the interior or exterior.
- // It supports odd TessFactor up to 65 and even TessFactor up to 64.
- static const int finalPointPositionTable[33] =
- { 0, 32, 16, 8, 17, 4, 18, 9, 19, 2, 20, 10, 21, 5, 22, 11, 23,
- 1, 24, 12, 25, 6, 26, 13, 27, 3, 28, 14, 29, 7, 30, 15, 31 };
-
- // The loopStart and loopEnd tables below just provide optimal loop bounds for the
- // stitching algorithm further below, for any given halfTssFactor.
- // There is probably a better way to encode this...
-
- // loopStart[halfTessFactor] encodes the FIRST entry in finalPointPositionTable[] above which is
- // less than halfTessFactor. Exceptions are entry 0 and 1, which are set up to skip the loop.
- static const int loopStart[33] =
- {1,1,17,9,9,5,5,5,5,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2};
- // loopStart[halfTessFactor] encodes the LAST entry in finalPointPositionTable[] above which is
- // less than halfTessFactor. Exceptions are entry 0 and 1, which are set up to skip the loop.
- static const int loopEnd[33] =
- {0,0,17,17,25,25,25,25,29,29,29,29,29,29,29,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,32};
-#endif
- if( TESSELLATOR_PARITY_ODD == insideEdgeTessFactorParity )
- {
- insideNumHalfTessFactorPoints -= 1;
- }
- if( TESSELLATOR_PARITY_ODD == outsideTessFactorParity )
- {
- outsideNumHalfTessFactorPoints -= 1;
- }
- // Walk first half
- int outsidePoint = outsideEdgePointBaseOffset;
- int insidePoint = insideEdgePointBaseOffset;
-
- // iStart,iEnd are a small optimization so the loop below doesn't have to go from 0 up to 31
- int iStart = min(loopStart[insideNumHalfTessFactorPoints],loopStart[outsideNumHalfTessFactorPoints]);
- int iEnd = max(loopEnd[insideNumHalfTessFactorPoints],loopEnd[outsideNumHalfTessFactorPoints]);
-
- if( finalPointPositionTable[0] < outsideNumHalfTessFactorPoints ) // since we dont' start the loop at 0 below, we need a special case.
- {
- // Advance outside
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3; outsidePoint++;
- }
-
- for(int i = iStart; i <= iEnd; i++)
- {
- if( /*(i>0) && <-- not needed since iStart is never 0*/(finalPointPositionTable[i] < insideNumHalfTessFactorPoints))
- {
- // Advance inside
- DefineClockwiseTriangle(insidePoint,outsidePoint,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3; insidePoint++;
- }
- if((finalPointPositionTable[i] < outsideNumHalfTessFactorPoints))
- {
- // Advance outside
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3; outsidePoint++;
- }
- }
-
- if( (insideEdgeTessFactorParity != outsideTessFactorParity) || (insideEdgeTessFactorParity == TESSELLATOR_PARITY_ODD))
- {
- if( insideEdgeTessFactorParity == outsideTessFactorParity )
- {
- // Quad in the middle
- DefineClockwiseTriangle(insidePoint,outsidePoint,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- DefineClockwiseTriangle(insidePoint+1,outsidePoint,outsidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++;
- outsidePoint++;
- }
- else if( TESSELLATOR_PARITY_EVEN == insideEdgeTessFactorParity )
- {
- // Triangle pointing inside
- DefineClockwiseTriangle(insidePoint,outsidePoint,outsidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- outsidePoint++;
- }
- else
- {
- // Triangle pointing outside
- DefineClockwiseTriangle(insidePoint,outsidePoint,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3;
- insidePoint++;
- }
- }
-
- // Walk second half.
- for(int i = iEnd; i >= iStart; i--)
- {
- if((finalPointPositionTable[i] < outsideNumHalfTessFactorPoints))
- {
- // Advance outside
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3; outsidePoint++;
- }
- if( /*(i>0) && <-- not needed since iStart is never 0*/ (finalPointPositionTable[i] < insideNumHalfTessFactorPoints))
- {
- // Advance inside
- DefineClockwiseTriangle(insidePoint,outsidePoint,insidePoint+1,baseIndexOffset);
- baseIndexOffset += 3; insidePoint++;
- }
- }
- // Below case is not needed if we didn't optimize loop above and made it run from 31 down to 0.
- if((finalPointPositionTable[0] < outsideNumHalfTessFactorPoints))
- {
- DefineClockwiseTriangle(outsidePoint,outsidePoint+1,insidePoint,baseIndexOffset);
- baseIndexOffset += 3; outsidePoint++;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHWTessellator::PatchIndexValue()
-//--------------------------------------------------------------------------------------------------------------------------------
-int CHWTessellator::PatchIndexValue(int index)
-{
- if( m_bUsingPatchedIndices )
- {
- if( index >= m_IndexPatchContext.outsidePointIndexPatchBase ) // assumed remapped outide indices are > remapped inside vertices
- {
- if( index == m_IndexPatchContext.outsidePointIndexBadValue )
- index = m_IndexPatchContext.outsidePointIndexReplacementValue;
- else
- index += m_IndexPatchContext.outsidePointIndexDeltaToRealValue;
- }
- else
- {
- if( index == m_IndexPatchContext.insidePointIndexBadValue )
- index = m_IndexPatchContext.insidePointIndexReplacementValue;
- else
- index += m_IndexPatchContext.insidePointIndexDeltaToRealValue;
- }
- }
- else if( m_bUsingPatchedIndices2 )
- {
- if( index >= m_IndexPatchContext2.baseIndexToInvert )
- {
- if( index == m_IndexPatchContext2.cornerCaseBadValue )
- {
- index = m_IndexPatchContext2.cornerCaseReplacementValue;
- }
- else
- {
- index = m_IndexPatchContext2.indexInversionEndPoint - index;
- }
- }
- else if( index == m_IndexPatchContext2.cornerCaseBadValue )
- {
- index = m_IndexPatchContext2.cornerCaseReplacementValue;
- }
- }
- return index;
-}
-
-
-//=================================================================================================================================
-// CHLSLTessellator
-//=================================================================================================================================
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::CHLSLTessellator
-//---------------------------------------------------------------------------------------------------------------------------------
-CHLSLTessellator::CHLSLTessellator()
-{
- m_LastComputedTessFactors[0] = m_LastComputedTessFactors[1] = m_LastComputedTessFactors[2] =
- m_LastComputedTessFactors[3] = m_LastComputedTessFactors[4] = m_LastComputedTessFactors[5] = 0;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::Init
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::Init(
- D3D11_TESSELLATOR_PARTITIONING partitioning,
- D3D11_TESSELLATOR_REDUCTION insideTessFactorReduction,
- D3D11_TESSELLATOR_QUAD_REDUCTION_AXIS quadInsideTessFactorReductionAxis,
- D3D11_TESSELLATOR_OUTPUT_PRIMITIVE outputPrimitive)
-{
- CHWTessellator::Init(partitioning,outputPrimitive);
- m_LastComputedTessFactors[0] = m_LastComputedTessFactors[1] = m_LastComputedTessFactors[2] =
- m_LastComputedTessFactors[3] = m_LastComputedTessFactors[4] = m_LastComputedTessFactors[5] = 0;
- m_partitioning = partitioning;
- m_originalPartitioning = partitioning;
- switch( partitioning )
- {
- case D3D11_TESSELLATOR_PARTITIONING_INTEGER:
- default:
- break;
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
- m_parity = TESSELLATOR_PARITY_ODD;
- break;
- case D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
- m_parity = TESSELLATOR_PARITY_EVEN;
- break;
- }
- m_originalParity = m_parity;
- m_outputPrimitive = outputPrimitive;
- m_insideTessFactorReduction = insideTessFactorReduction;
- m_quadInsideTessFactorReductionAxis = quadInsideTessFactorReductionAxis;
-}
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::TessellateQuadDomain
-// User calls this
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::TessellateQuadDomain( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Ueq1, float tessFactor_Veq1,
- float insideTessFactorScaleU, float insideTessFactorScaleV )
-{
- QuadHLSLProcessTessFactors(tessFactor_Ueq0,tessFactor_Veq0,tessFactor_Ueq1,tessFactor_Veq1,insideTessFactorScaleU,insideTessFactorScaleV);
-
- CHWTessellator::TessellateQuadDomain(m_LastComputedTessFactors[0],m_LastComputedTessFactors[1],m_LastComputedTessFactors[2],m_LastComputedTessFactors[3],
- m_LastComputedTessFactors[4],m_LastComputedTessFactors[5]);
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::QuadHLSLProcessTessFactors
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::QuadHLSLProcessTessFactors( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Ueq1, float tessFactor_Veq1,
- float insideTessFactorScaleU, float insideTessFactorScaleV )
-{
- if( !(tessFactor_Ueq0 > 0) ||// NaN will pass
- !(tessFactor_Veq0 > 0) ||
- !(tessFactor_Ueq1 > 0) ||
- !(tessFactor_Veq1 > 0) )
- {
- m_LastUnRoundedComputedTessFactors[0] = tessFactor_Ueq0;
- m_LastUnRoundedComputedTessFactors[1] = tessFactor_Veq0;
- m_LastUnRoundedComputedTessFactors[2] = tessFactor_Ueq1;
- m_LastUnRoundedComputedTessFactors[3] = tessFactor_Veq1;
- m_LastUnRoundedComputedTessFactors[4] = 0;
- m_LastUnRoundedComputedTessFactors[5] = 0;
- m_LastComputedTessFactors[0] =
- m_LastComputedTessFactors[1] =
- m_LastComputedTessFactors[2] =
- m_LastComputedTessFactors[3] =
- m_LastComputedTessFactors[4] =
- m_LastComputedTessFactors[5] = 0;
- return;
- }
-
- CleanupFloatTessFactor(tessFactor_Ueq0);// clamp to [1.0f..INF], NaN->1.0f
- CleanupFloatTessFactor(tessFactor_Veq0);
- CleanupFloatTessFactor(tessFactor_Ueq1);
- CleanupFloatTessFactor(tessFactor_Veq1);
-
- // Save off tessFactors so they can be returned to app
- m_LastUnRoundedComputedTessFactors[0] = tessFactor_Ueq0;
- m_LastUnRoundedComputedTessFactors[1] = tessFactor_Veq0;
- m_LastUnRoundedComputedTessFactors[2] = tessFactor_Ueq1;
- m_LastUnRoundedComputedTessFactors[3] = tessFactor_Veq1;
-
- // Process outside tessFactors
- float outsideTessFactor[QUAD_EDGES] = {tessFactor_Ueq0, tessFactor_Veq0, tessFactor_Ueq1, tessFactor_Veq1};
- int edge, axis;
- TESSELLATOR_PARITY insideTessFactorParity[QUAD_AXES];
- if( Pow2Partitioning() || IntegerPartitioning() )
- {
- for( edge = 0; edge < QUAD_EDGES; edge++ )
- {
- RoundUpTessFactor(outsideTessFactor[edge]);
- ClampTessFactor(outsideTessFactor[edge]); // clamp unbounded user input based on tessellation mode
- }
- }
- else
- {
- SetTessellationParity(m_originalParity); // ClampTessFactor needs it
- for( edge = 0; edge < QUAD_EDGES; edge++ )
- {
- ClampTessFactor(outsideTessFactor[edge]); // clamp unbounded user input based on tessellation mode
- }
- }
-
- // Compute inside TessFactors
- float insideTessFactor[QUAD_AXES] = {0.0};
- if( m_quadInsideTessFactorReductionAxis == D3D11_TESSELLATOR_QUAD_REDUCTION_1_AXIS )
- {
- switch( m_insideTessFactorReduction )
- {
- case D3D11_TESSELLATOR_REDUCTION_MIN:
- insideTessFactor[U] = tess_fmin(tess_fmin(tessFactor_Veq0,tessFactor_Veq1),tess_fmin(tessFactor_Ueq0,tessFactor_Ueq1));
- break;
- case D3D11_TESSELLATOR_REDUCTION_MAX:
- insideTessFactor[U] = tess_fmax(tess_fmax(tessFactor_Veq0,tessFactor_Veq1),tess_fmax(tessFactor_Ueq0,tessFactor_Ueq1));
- break;
- case D3D11_TESSELLATOR_REDUCTION_AVERAGE:
- insideTessFactor[U] = (tessFactor_Veq0 + tessFactor_Veq1 + tessFactor_Ueq0 + tessFactor_Ueq1) / 4;
- break;
- }
- // Scale inside tessFactor based on user scale factor.
-
- ClampFloatTessFactorScale(insideTessFactorScaleU); // clamp scale value to [0..1], NaN->0
- insideTessFactor[U] = insideTessFactor[U]*insideTessFactorScaleU;
-
- // Compute inside parity
- if( Pow2Partitioning() || IntegerPartitioning() )
- {
- ClampTessFactor(insideTessFactor[U]); // clamp reduction + scale result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[4] = m_LastUnRoundedComputedTessFactors[5] = insideTessFactor[U]; // Save off TessFactors so they can be returned to app
- RoundUpTessFactor(insideTessFactor[U]);
- insideTessFactorParity[U] =
- insideTessFactorParity[V] =
- (isEven(insideTessFactor[U]) || (FLOAT_ONE == insideTessFactor[U]) )
- ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- else
- {
- ClampTessFactor(insideTessFactor[U]); // clamp reduction + scale result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[4] = m_LastUnRoundedComputedTessFactors[5] = insideTessFactor[U]; // Save off TessFactors so they can be returned to app
- // no parity changes for fractional tessellation - just use what the user requested
- insideTessFactorParity[U] = insideTessFactorParity[V] = m_originalParity;
- }
-
- // To prevent snapping on edges, the "picture frame" comes
- // in using avg or max (and ignore inside TessFactor scaling) until it is at least 3.
- if( (TESSELLATOR_PARITY_ODD == insideTessFactorParity[U]) &&
- (insideTessFactor[U] < FLOAT_THREE) )
- {
- if(D3D11_TESSELLATOR_REDUCTION_MAX == m_insideTessFactorReduction)
- {
- insideTessFactor[U] = tess_fmin(FLOAT_THREE,tess_fmax(tess_fmax(tessFactor_Veq0,tessFactor_Veq1),tess_fmax(tessFactor_Ueq0,tessFactor_Ueq1)));
- }
- else
- {
- insideTessFactor[U] = tess_fmin(FLOAT_THREE,(tessFactor_Veq0 + tessFactor_Veq1 + tessFactor_Ueq0 + tessFactor_Ueq1) / 4);
- }
- ClampTessFactor(insideTessFactor[U]); // clamp reduction result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[4] = m_LastUnRoundedComputedTessFactors[5] = insideTessFactor[U]; // Save off TessFactors so they can be returned to app
- if( IntegerPartitioning())
- {
- RoundUpTessFactor(insideTessFactor[U]);
- insideTessFactorParity[U] =
- insideTessFactorParity[V] = isEven(insideTessFactor[U]) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- }
- insideTessFactor[V] = insideTessFactor[U];
- }
- else
- {
- switch( m_insideTessFactorReduction )
- {
- case D3D11_TESSELLATOR_REDUCTION_MIN:
- insideTessFactor[U] = tess_fmin(tessFactor_Veq0,tessFactor_Veq1);
- insideTessFactor[V] = tess_fmin(tessFactor_Ueq0,tessFactor_Ueq1);
- break;
- case D3D11_TESSELLATOR_REDUCTION_MAX:
- insideTessFactor[U] = tess_fmax(tessFactor_Veq0,tessFactor_Veq1);
- insideTessFactor[V] = tess_fmax(tessFactor_Ueq0,tessFactor_Ueq1);
- break;
- case D3D11_TESSELLATOR_REDUCTION_AVERAGE:
- insideTessFactor[U] = (tessFactor_Veq0 + tessFactor_Veq1) / 2;
- insideTessFactor[V] = (tessFactor_Ueq0 + tessFactor_Ueq1) / 2;
- break;
- }
- // Scale inside tessFactors based on user scale factor.
-
- ClampFloatTessFactorScale(insideTessFactorScaleU); // clamp scale value to [0..1], NaN->0
- ClampFloatTessFactorScale(insideTessFactorScaleV);
- insideTessFactor[U] = insideTessFactor[U]*insideTessFactorScaleU;
- insideTessFactor[V] = insideTessFactor[V]*insideTessFactorScaleV;
-
- // Compute inside parity
- if( Pow2Partitioning() || IntegerPartitioning() )
- {
- for( axis = 0; axis < QUAD_AXES; axis++ )
- {
- ClampTessFactor(insideTessFactor[axis]); // clamp reduction + scale result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[4+axis] = insideTessFactor[axis]; // Save off TessFactors so they can be returned to app
- RoundUpTessFactor(insideTessFactor[axis]);
- insideTessFactorParity[axis] =
- (isEven(insideTessFactor[axis]) || (FLOAT_ONE == insideTessFactor[axis]) )
- ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- }
- else
- {
- ClampTessFactor(insideTessFactor[U]); // clamp reduction + scale result that is based on unbounded user input
- ClampTessFactor(insideTessFactor[V]); // clamp reduction + scale result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[4] = insideTessFactor[U]; // Save off TessFactors so they can be returned to app
- m_LastUnRoundedComputedTessFactors[5] = insideTessFactor[V]; // Save off TessFactors so they can be returned to app
- // no parity changes for fractional tessellation - just use what the user requested
- insideTessFactorParity[U] = insideTessFactorParity[V] = m_originalParity;
- }
-
- // To prevent snapping on edges, the "picture frame" comes
- // in using avg or max (and ignore inside TessFactor scaling) until it is at least 3.
- if( (TESSELLATOR_PARITY_ODD == insideTessFactorParity[U]) &&
- (insideTessFactor[U] < FLOAT_THREE) )
- {
- if(D3D11_TESSELLATOR_REDUCTION_MAX == m_insideTessFactorReduction)
- {
- insideTessFactor[U] = tess_fmin(FLOAT_THREE,tess_fmax(tessFactor_Veq0,tessFactor_Veq1));
- }
- else
- {
- insideTessFactor[U] = tess_fmin(FLOAT_THREE,(tessFactor_Veq0 + tessFactor_Veq1) / 2);
- }
- ClampTessFactor(insideTessFactor[U]); // clamp reduction result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[4] = insideTessFactor[U]; // Save off TessFactors so they can be returned to app
- if( IntegerPartitioning())
- {
- RoundUpTessFactor(insideTessFactor[U]);
- insideTessFactorParity[U] = isEven(insideTessFactor[U]) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- }
-
- if( (TESSELLATOR_PARITY_ODD == insideTessFactorParity[V]) &&
- (insideTessFactor[V] < FLOAT_THREE) )
- {
- if(D3D11_TESSELLATOR_REDUCTION_MAX == m_insideTessFactorReduction)
- {
- insideTessFactor[V] = tess_fmin(FLOAT_THREE,tess_fmax(tessFactor_Ueq0,tessFactor_Ueq1));
- }
- else
- {
- insideTessFactor[V] = tess_fmin(FLOAT_THREE,(tessFactor_Ueq0 + tessFactor_Ueq1) / 2);
- }
- ClampTessFactor(insideTessFactor[V]);// clamp reduction result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[5] = insideTessFactor[V]; // Save off TessFactors so they can be returned to app
- if( IntegerPartitioning())
- {
- RoundUpTessFactor(insideTessFactor[V]);
- insideTessFactorParity[V] = isEven(insideTessFactor[V]) ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- }
-
- for( axis = 0; axis < QUAD_AXES; axis++ )
- {
- if( TESSELLATOR_PARITY_ODD == insideTessFactorParity[axis] )
- {
- // Ensure the first ring ("picture frame") interpolates in on all sides
- // as much as the side with the minimum TessFactor. Prevents snapping to edge.
- if( (insideTessFactor[axis] < FLOAT_THREE) && (insideTessFactor[axis] < insideTessFactor[(axis+1)&0x1]))
- {
- insideTessFactor[axis] = tess_fmin(insideTessFactor[(axis+1)&0x1],FLOAT_THREE);
- m_LastUnRoundedComputedTessFactors[4+axis] = insideTessFactor[axis]; // Save off TessFactors so they can be returned to app
- }
- }
- }
- }
-
- // Save off TessFactors so they can be returned to app
- m_LastComputedTessFactors[0] = outsideTessFactor[Ueq0];
- m_LastComputedTessFactors[1] = outsideTessFactor[Veq0];
- m_LastComputedTessFactors[2] = outsideTessFactor[Ueq1];
- m_LastComputedTessFactors[3] = outsideTessFactor[Veq1];
- m_LastComputedTessFactors[4] = insideTessFactor[U];
- m_LastComputedTessFactors[5] = insideTessFactor[V];
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::TessellateTriDomain
-// User calls this
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::TessellateTriDomain( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Weq0,
- float insideTessFactorScale )
-{
- TriHLSLProcessTessFactors(tessFactor_Ueq0,tessFactor_Veq0,tessFactor_Weq0,insideTessFactorScale);
-
- CHWTessellator::TessellateTriDomain(m_LastComputedTessFactors[0],m_LastComputedTessFactors[1],m_LastComputedTessFactors[2],m_LastComputedTessFactors[3]);
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::TriHLSLProcessTessFactors
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::TriHLSLProcessTessFactors( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Weq0,
- float insideTessFactorScale )
-{
- if( !(tessFactor_Ueq0 > 0) || // NaN will pass
- !(tessFactor_Veq0 > 0) ||
- !(tessFactor_Weq0 > 0) )
- {
- m_LastUnRoundedComputedTessFactors[0] = tessFactor_Ueq0;
- m_LastUnRoundedComputedTessFactors[1] = tessFactor_Veq0;
- m_LastUnRoundedComputedTessFactors[2] = tessFactor_Weq0;
- m_LastUnRoundedComputedTessFactors[3] =
- m_LastComputedTessFactors[0] =
- m_LastComputedTessFactors[1] =
- m_LastComputedTessFactors[2] =
- m_LastComputedTessFactors[3] = 0;
- return;
- }
-
- CleanupFloatTessFactor(tessFactor_Ueq0); // clamp to [1.0f..INF], NaN->1.0f
- CleanupFloatTessFactor(tessFactor_Veq0);
- CleanupFloatTessFactor(tessFactor_Weq0);
-
- // Save off TessFactors so they can be returned to app
- m_LastUnRoundedComputedTessFactors[0] = tessFactor_Ueq0;
- m_LastUnRoundedComputedTessFactors[1] = tessFactor_Veq0;
- m_LastUnRoundedComputedTessFactors[2] = tessFactor_Weq0;
-
- // Process outside TessFactors
- float outsideTessFactor[TRI_EDGES] = {tessFactor_Ueq0, tessFactor_Veq0, tessFactor_Weq0};
- int edge;
- if( Pow2Partitioning() || IntegerPartitioning() )
- {
- for( edge = 0; edge < TRI_EDGES; edge++ )
- {
- RoundUpTessFactor(outsideTessFactor[edge]); // for pow2 this rounds to pow2
- ClampTessFactor(outsideTessFactor[edge]); // clamp unbounded user input based on tessellation mode
- }
- }
- else
- {
- for( edge = 0; edge < TRI_EDGES; edge++ )
- {
- ClampTessFactor(outsideTessFactor[edge]); // clamp unbounded user input based on tessellation mode
- }
- }
-
- // Compute inside TessFactor
- float insideTessFactor = 0.0;
- switch( m_insideTessFactorReduction )
- {
- case D3D11_TESSELLATOR_REDUCTION_MIN:
- insideTessFactor = tess_fmin(tess_fmin(tessFactor_Ueq0,tessFactor_Veq0),tessFactor_Weq0);
- break;
- case D3D11_TESSELLATOR_REDUCTION_MAX:
- insideTessFactor = tess_fmax(tess_fmax(tessFactor_Ueq0,tessFactor_Veq0),tessFactor_Weq0);
- break;
- case D3D11_TESSELLATOR_REDUCTION_AVERAGE:
- insideTessFactor = (tessFactor_Ueq0 + tessFactor_Veq0 + tessFactor_Weq0) / 3;
- break;
- }
-
- // Scale inside TessFactor based on user scale factor.
- ClampFloatTessFactorScale(insideTessFactorScale); // clamp scale value to [0..1], NaN->0
- insideTessFactor = insideTessFactor*tess_fmin(FLOAT_ONE,insideTessFactorScale);
-
- ClampTessFactor(insideTessFactor); // clamp reduction + scale result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[3] = insideTessFactor;// Save off TessFactors so they can be returned to app
- TESSELLATOR_PARITY parity;
- if( Pow2Partitioning() || IntegerPartitioning() )
- {
- RoundUpTessFactor(insideTessFactor);
- parity = (isEven(insideTessFactor) || (FLOAT_ONE == insideTessFactor))
- ? TESSELLATOR_PARITY_EVEN : TESSELLATOR_PARITY_ODD;
- }
- else
- {
- parity = m_originalParity;
- }
-
- if( (TESSELLATOR_PARITY_ODD == parity) &&
- (insideTessFactor < FLOAT_THREE))
- {
- // To prevent snapping on edges, the "picture frame" comes
- // in using avg or max (and ignore inside TessFactor scaling) until it is at least 3.
- if(D3D11_TESSELLATOR_REDUCTION_MAX == m_insideTessFactorReduction)
- {
- insideTessFactor = tess_fmin(FLOAT_THREE,tess_fmax(tessFactor_Ueq0,tess_fmax(tessFactor_Veq0,tessFactor_Weq0)));
- }
- else
- {
- insideTessFactor = tess_fmin(FLOAT_THREE,(tessFactor_Ueq0 + tessFactor_Veq0 + tessFactor_Weq0) / 3);
- }
- ClampTessFactor(insideTessFactor); // clamp reduction result that is based on unbounded user input
- m_LastUnRoundedComputedTessFactors[3] = insideTessFactor;// Save off TessFactors so they can be returned to app
- if( IntegerPartitioning())
- {
- RoundUpTessFactor(insideTessFactor);
- }
- }
-
- // Save off TessFactors so they can be returned to app
- m_LastComputedTessFactors[0] = outsideTessFactor[Ueq0];
- m_LastComputedTessFactors[1] = outsideTessFactor[Veq0];
- m_LastComputedTessFactors[2] = outsideTessFactor[Weq0];
- m_LastComputedTessFactors[3] = insideTessFactor;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::TessellateIsoLineDomain
-// User calls this.
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::TessellateIsoLineDomain( float TessFactor_U_LineDetail, float TessFactor_V_LineDensity )
-{
- IsoLineHLSLProcessTessFactors(TessFactor_V_LineDensity,TessFactor_U_LineDetail);
- CHWTessellator::TessellateIsoLineDomain(m_LastComputedTessFactors[0],m_LastComputedTessFactors[1]);
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::IsoLineHLSLProcessTessFactors
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::IsoLineHLSLProcessTessFactors( float TessFactor_V_LineDensity, float TessFactor_U_LineDetail )
-{
- if( !(TessFactor_V_LineDensity > 0) || // NaN will pass
- !(TessFactor_U_LineDetail > 0) )
- {
- m_LastUnRoundedComputedTessFactors[0] = TessFactor_V_LineDensity;
- m_LastUnRoundedComputedTessFactors[1] = TessFactor_U_LineDetail;
- m_LastComputedTessFactors[0] =
- m_LastComputedTessFactors[1] = 0;
- return;
- }
-
- CleanupFloatTessFactor(TessFactor_V_LineDensity); // clamp to [1.0f..INF], NaN->1.0f
- CleanupFloatTessFactor(TessFactor_U_LineDetail); // clamp to [1.0f..INF], NaN->1.0f
-
- ClampTessFactor(TessFactor_U_LineDetail); // clamp unbounded user input based on tessellation mode
-
- m_LastUnRoundedComputedTessFactors[1] = TessFactor_U_LineDetail; // Save off TessFactors so they can be returned to app
-
- if(Pow2Partitioning()||IntegerPartitioning())
- {
- RoundUpTessFactor(TessFactor_U_LineDetail);
- }
-
- OverridePartitioning(D3D11_TESSELLATOR_PARTITIONING_INTEGER);
-
- ClampTessFactor(TessFactor_V_LineDensity); // Clamp unbounded user input to integer
- m_LastUnRoundedComputedTessFactors[0] = TessFactor_V_LineDensity; // Save off TessFactors so they can be returned to app
-
- RoundUpTessFactor(TessFactor_V_LineDensity);
-
- RestorePartitioning();
-
- // Save off TessFactors so they can be returned to app
- m_LastComputedTessFactors[0] = TessFactor_V_LineDensity;
- m_LastComputedTessFactors[1] = TessFactor_U_LineDetail;
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::ClampTessFactor()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::ClampTessFactor(float& TessFactor)
-{
- if( Pow2Partitioning() )
- {
- TessFactor = tess_fmin( D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR, tess_fmax( TessFactor, D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR) );
- }
- else if( IntegerPartitioning() )
- {
- TessFactor = tess_fmin( D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR, tess_fmax( TessFactor, D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR) );
- }
- else if( Odd() )
- {
- TessFactor = tess_fmin( D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR, tess_fmax( TessFactor, D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR) );
- }
- else // even
- {
- TessFactor = tess_fmin( D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR, tess_fmax( TessFactor, D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR) );
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::CleanupFloatTessFactor()
-//---------------------------------------------------------------------------------------------------------------------------------
-static const int exponentMask = 0x7f800000;
-static const int mantissaMask = 0x007fffff;
-void CHLSLTessellator::CleanupFloatTessFactor(float& input)
-{
- // If input is < 1.0f or NaN, clamp to 1.0f.
- // In other words, clamp input to [1.0f...+INF]
- int bits = *(int*)&input;
- if( ( ( ( bits & exponentMask ) == exponentMask ) && ( bits & mantissaMask ) ) ||// nan?
- (input < 1.0f) )
- {
- input = 1;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::ClampFloatTessFactorScale()
-//---------------------------------------------------------------------------------------------------------------------------------
-void CHLSLTessellator::ClampFloatTessFactorScale(float& input)
-{
- // If input is < 0.0f or NaN, clamp to 0.0f. > 1 clamps to 1.
- // In other words, clamp input to [0.0f...1.0f]
- int bits = *(int*)&input;
- if( ( ( ( bits & exponentMask ) == exponentMask ) && ( bits & mantissaMask ) ) ||// nan?
- (input < 0.0f) )
- {
- input = 0;
- }
- else if( input > 1 )
- {
- input = 1;
- }
-}
-
-//---------------------------------------------------------------------------------------------------------------------------------
-// CHLSLTessellator::RoundUpTessFactor()
-//---------------------------------------------------------------------------------------------------------------------------------
-static const int exponentLSB = 0x00800000;
-void CHLSLTessellator::RoundUpTessFactor(float& TessFactor)
-{
- // Assume TessFactor is in [1.0f..+INF]
- if( Pow2Partitioning() )
- {
- int bits = *(int*)&TessFactor;
- if( bits & mantissaMask )
- {
- *(int*)&TessFactor = (bits & exponentMask) + exponentLSB;
- }
- }
- else if( IntegerPartitioning() )
- {
- TessFactor = ceil(TessFactor);
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tessellator.h b/src/gallium/drivers/swr/rasterizer/core/tessellator.h
deleted file mode 100644
index 30b6b4fca1e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/tessellator.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2019 without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file tessellator.h
- *
- * @brief Tessellator fixed function unit interface definition
- *
- ******************************************************************************/
-#pragma once
-
-#include "tessellator.hpp"
-
-struct SWR_TS_TESSELLATED_DATA
-{
- uint32_t NumPrimitives;
- uint32_t NumDomainPoints;
-
- uint32_t* ppIndices[3];
- float* pDomainPointsU;
- float* pDomainPointsV;
- // For Tri: pDomainPointsW[i] = 1.0f - pDomainPointsU[i] - pDomainPointsV[i]
-};
-
-namespace Tessellator
-{
- /// Wrapper class for the CHWTessellator reference tessellator from MSFT
- /// This class will store data not originally stored in CHWTessellator
- class SWR_TS : private CHWTessellator
- {
- private:
- typedef CHWTessellator SUPER;
- SWR_TS_DOMAIN Domain;
- OSALIGNSIMD(float) DomainPointsU[MAX_POINT_COUNT];
- OSALIGNSIMD(float) DomainPointsV[MAX_POINT_COUNT];
- uint32_t NumDomainPoints;
- OSALIGNSIMD(uint32_t) Indices[3][MAX_INDEX_COUNT / 3];
- uint32_t NumIndices;
-
- public:
- void Init(SWR_TS_DOMAIN tsDomain,
- SWR_TS_PARTITIONING tsPartitioning,
- SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology)
- {
- static D3D11_TESSELLATOR_PARTITIONING CVT_TS_D3D_PARTITIONING[] = {
- D3D11_TESSELLATOR_PARTITIONING_INTEGER, // SWR_TS_INTEGER
- D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD, // SWR_TS_ODD_FRACTIONAL
- D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN, // SWR_TS_EVEN_FRACTIONAL
- D3D11_TESSELLATOR_PARTITIONING_POW2 // SWR_TS_POW2
- };
-
- static D3D11_TESSELLATOR_OUTPUT_PRIMITIVE CVT_TS_D3D_OUTPUT_TOPOLOGY[] = {
- D3D11_TESSELLATOR_OUTPUT_POINT, // SWR_TS_OUTPUT_POINT
- D3D11_TESSELLATOR_OUTPUT_LINE, // SWR_TS_OUTPUT_LINE
- D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW, // SWR_TS_OUTPUT_TRI_CW - inverted logic, because DX
- D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW // SWR_TS_OUTPUT_TRI_CCW - inverted logic, because DX
- };
-
- SUPER::Init(CVT_TS_D3D_PARTITIONING[tsPartitioning],
- CVT_TS_D3D_OUTPUT_TOPOLOGY[tsOutputTopology]);
-
- Domain = tsDomain;
- NumDomainPoints = 0;
- NumIndices = 0;
- }
-
- void Tessellate(const SWR_TESSELLATION_FACTORS& tsTessFactors,
- SWR_TS_TESSELLATED_DATA& tsTessellatedData)
- {
- uint32_t IndexDiv = 0;
- switch (Domain)
- {
- case SWR_TS_QUAD:
- IndexDiv = 3;
- SUPER::TessellateQuadDomain(
- tsTessFactors.OuterTessFactors[SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL],
- tsTessFactors.OuterTessFactors[SWR_QUAD_V_EQ0_TRI_W],
- tsTessFactors.OuterTessFactors[SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY],
- tsTessFactors.OuterTessFactors[SWR_QUAD_V_EQ1],
- tsTessFactors.InnerTessFactors[SWR_QUAD_U_TRI_INSIDE],
- tsTessFactors.InnerTessFactors[SWR_QUAD_V_INSIDE]);
- break;
-
- case SWR_TS_TRI:
- IndexDiv = 3;
- SUPER::TessellateTriDomain(
- tsTessFactors.OuterTessFactors[SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL],
- tsTessFactors.OuterTessFactors[SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY],
- tsTessFactors.OuterTessFactors[SWR_QUAD_V_EQ0_TRI_W],
- tsTessFactors.InnerTessFactors[SWR_QUAD_U_TRI_INSIDE]);
- break;
-
- case SWR_TS_ISOLINE:
- IndexDiv = 2;
- SUPER::TessellateIsoLineDomain(
- tsTessFactors.OuterTessFactors[SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY],
- tsTessFactors.OuterTessFactors[SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL]);
- break;
-
- default:
- SWR_INVALID("Invalid Tessellation Domain: %d", Domain);
- assert(false);
- }
-
- NumDomainPoints = (uint32_t)SUPER::GetPointCount();
-
- DOMAIN_POINT* pPoints = SUPER::GetPoints();
- for (uint32_t i = 0; i < NumDomainPoints; i++) {
- DomainPointsU[i] = pPoints[i].u;
- DomainPointsV[i] = pPoints[i].v;
- }
- tsTessellatedData.NumDomainPoints = NumDomainPoints;
- tsTessellatedData.pDomainPointsU = &DomainPointsU[0];
- tsTessellatedData.pDomainPointsV = &DomainPointsV[0];
-
- NumIndices = (uint32_t)SUPER::GetIndexCount();
-
- assert(NumIndices % IndexDiv == 0);
- tsTessellatedData.NumPrimitives = NumIndices / IndexDiv;
-
- uint32_t* pIndices = (uint32_t*)SUPER::GetIndices();
- for (uint32_t i = 0; i < NumIndices; i++) {
- Indices[i % IndexDiv][i / IndexDiv] = pIndices[i];
- }
-
- tsTessellatedData.ppIndices[0] = &Indices[0][0];
- tsTessellatedData.ppIndices[1] = &Indices[1][0];
- tsTessellatedData.ppIndices[2] = &Indices[2][0];
- }
- };
-} // namespace Tessellator
-
-/// Allocate and initialize a new tessellation context
-INLINE HANDLE SWR_API
- TSInitCtx(SWR_TS_DOMAIN tsDomain, ///< [IN] Tessellation domain (isoline, quad, triangle)
- SWR_TS_PARTITIONING tsPartitioning, ///< [IN] Tessellation partitioning algorithm
- SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology, ///< [IN] Tessellation output topology
- void* pContextMem, ///< [IN] Memory to use for the context
- size_t& memSize) ///< [INOUT] In: Amount of memory in pContextMem. Out: Mem required
-{
- using Tessellator::SWR_TS;
- SWR_ASSERT(tsDomain < SWR_TS_DOMAIN_COUNT);
- SWR_ASSERT(tsPartitioning < SWR_TS_PARTITIONING_COUNT);
- SWR_ASSERT(tsOutputTopology < SWR_TS_OUTPUT_TOPOLOGY_COUNT);
-
- size_t origMemSize = memSize;
- memSize = AlignUp(sizeof(SWR_TS), 64);
-
- if (nullptr == pContextMem || memSize > origMemSize)
- {
- return nullptr;
- }
-
- HANDLE tsCtx = pContextMem;
-
- SWR_TS* pTessellator = new (tsCtx) SWR_TS();
- SWR_ASSERT(pTessellator == tsCtx);
-
- pTessellator->Init(tsDomain, tsPartitioning, tsOutputTopology);
-
- return tsCtx;
-}
-
-/// Destroy & de-allocate tessellation context
-INLINE void SWR_API TSDestroyCtx(HANDLE tsCtx) ///< [IN] Tessellation context to be destroyed
-{
- using Tessellator::SWR_TS;
- SWR_TS* pTessellator = (SWR_TS*)tsCtx;
-
- if (pTessellator)
- {
- pTessellator->~SWR_TS();
- }
-}
-
-/// Perform Tessellation
-INLINE void SWR_API
- TSTessellate(HANDLE tsCtx, ///< [IN] Tessellation Context
- const SWR_TESSELLATION_FACTORS& tsTessFactors, ///< [IN] Tessellation Factors
- SWR_TS_TESSELLATED_DATA& tsTessellatedData) ///< [OUT] Tessellated Data
-{
- using Tessellator::SWR_TS;
- SWR_TS* pTessellator = (SWR_TS*)tsCtx;
- SWR_ASSERT(pTessellator);
-
- pTessellator->Tessellate(tsTessFactors, tsTessellatedData);
-}
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/tessellator.hpp b/src/gallium/drivers/swr/rasterizer/core/tessellator.hpp
deleted file mode 100644
index 459c1093d2e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/tessellator.hpp
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- Copyright (c) Microsoft Corporation
-
- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
- associated documentation files (the "Software"), to deal in the Software without restriction,
- including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
- and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
- subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
- NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#pragma once
-//=================================================================================================================================
-// Microsoft D3D11 Fixed Function Tessellator Reference - May 7, 2012
-// amar.patel@microsoft.com
-//
-// CHWTessellator demonstrates what is expected of hardware in the D3D11 fixed function Tessellator stage. Hardware
-// implementers need only look at this class.
-//
-// CHLSLTessellator is a wrapper for CHWTessellator, representing the effect of shader code that will
-// be autogenerated by HLSL in the Hull Shader, both for plumbing data around, and to precondition TessFactor values before they
-// are passed to the hardware (such as deriving inside TessFactors from edge TessFactors). The algorithms used
-// in CHLSLTessellator are subject to change, but since they represent shader code auto-generated by the HLSL compiler,
-// CHLSLTessellator has no effect on hardware design at all. Note the HLSL compiler will expose all the raw hardware
-// control illustrated by CHWTessellator for those who don't need the helper functionality illustrated by CHLSLTessellator.
-//
-// Usage: (1) Create either a CHLSLTessellator or CHWTessellator object, depending on which you want to verify.
-// (2) Call C*Tessellator::Init()
-// (3) Call C*Tessellator::Tessellate[IsoLine|Tri|Quad]Domain()
-// - Here you pass in TessFactors (how much to tessellate)
-// (4) Call C*Tessellator::GetPointCount(), C*Tessellator::GetIndexCount() to see how much data was generated.
-// (5) Call C*Tessellator::GetPoints() and C*Tessellator::GetIndices() to get pointers to the data.
-// The pointers are fixed for the lifetime of the object (storage for max tessellation),
-// so if you ::Tessellate again, the data in the buffers is overwritten.
-// (6) There are various other Get() methods to retrieve TessFactors that have been processed from
-// what you passed in at step 3. You can retrieve separate TessFactors that the tessellator
-// produced after clamping but before rounding, and also after rounding (say in pow2 mode).
-// These numbers can be useful information if you are geomorphing displacement maps.
-// (7) Goto Step 2 or 3 if you want to animate TessFactors or tessellate a different patch
-//
-// Code implementation details:
-//
-// There is lots of headroom to make this code run faster on CPUs. It was written merely as a reference for
-// what results hardware should produce, with CPU performance not a consideration. It is nice that this implementation
-// only generates the exact number of vertices needed (no duplicates) in the output vertex buffer. Also, the number
-// of calculations done for each U/V domain coordinate is minimized by doing some precalculation of some patch or edge
-// invariant numbers (see TESS_FACTOR_CONTEXT). All the vertex coordinate calculations could be computed with as much
-// parallelism as you like. Similarly the calculation of connectivity itself is highly parallelizable, and can also
-// be done independent of the vertex calculations.
-//
-//=================================================================================================================================
-
-#define D3D11_TESSELLATOR_MIN_ODD_TESSELLATION_FACTOR 1
-#define D3D11_TESSELLATOR_MAX_ODD_TESSELLATION_FACTOR 63
-#define D3D11_TESSELLATOR_MIN_EVEN_TESSELLATION_FACTOR 2
-#define D3D11_TESSELLATOR_MAX_EVEN_TESSELLATION_FACTOR 64
-
-#define D3D11_TESSELLATOR_MIN_ISOLINE_DENSITY_TESSELLATION_FACTOR 1
-#define D3D11_TESSELLATOR_MAX_ISOLINE_DENSITY_TESSELLATION_FACTOR 64
-
-#define D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR 64 // max of even and odd tessFactors
-
-#define MAX_POINT_COUNT ((D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR+1)*(D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR+1))
-#define MAX_INDEX_COUNT (D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR*D3D11_TESSELLATOR_MAX_TESSELLATION_FACTOR*2*3)
-
-//=================================================================================================================================
-// Data types for the caller
-//=================================================================================================================================
-enum D3D11_TESSELLATOR_PARTITIONING
-{
- D3D11_TESSELLATOR_PARTITIONING_INTEGER,
- D3D11_TESSELLATOR_PARTITIONING_POW2,
- D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD,
- D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN
-};
-
-enum D3D11_TESSELLATOR_REDUCTION
-{
- D3D11_TESSELLATOR_REDUCTION_MIN,
- D3D11_TESSELLATOR_REDUCTION_MAX,
- D3D11_TESSELLATOR_REDUCTION_AVERAGE
-};
-
-enum D3D11_TESSELLATOR_QUAD_REDUCTION_AXIS
-{
- D3D11_TESSELLATOR_QUAD_REDUCTION_1_AXIS,
- D3D11_TESSELLATOR_QUAD_REDUCTION_2_AXIS
-};
-
-enum D3D11_TESSELLATOR_OUTPUT_PRIMITIVE
-{
- D3D11_TESSELLATOR_OUTPUT_POINT,
- D3D11_TESSELLATOR_OUTPUT_LINE,
- D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW,
- D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW,
-};
-
-typedef struct DOMAIN_POINT
-{
- float u;
- float v; // for tri, w = 1 - u - v;
-} DOMAIN_POINT;
-
-//=================================================================================================================================
-// CHWTessellator: D3D11 Tessellation Fixed Function Hardware Reference
-//=================================================================================================================================
-typedef unsigned int FXP; // fixed point number
-
-class CHWTessellator
-{
-
-//---------------------------------------------------------------------------------------------------------------------------------
-public:
- void Init( D3D11_TESSELLATOR_PARTITIONING partitioning,
- D3D11_TESSELLATOR_OUTPUT_PRIMITIVE outputPrimitive);
-
- void TessellateIsoLineDomain( float TessFactor_V_LineDensity,
- float TessFactor_U_LineDetail );
-
- void TessellateTriDomain( float TessFactor_Ueq0,
- float TessFactor_Veq0,
- float TessFactor_Weq0,
- float TessFactor_Inside );
-
- void TessellateQuadDomain( float TessFactor_Ueq0,
- float TessFactor_Veq0,
- float TessFactor_Ueq1,
- float TessFactor_Veq1,
- float TessFactor_InsideU,
- float TessFactor_InsideV );
-
- int GetPointCount();
- int GetIndexCount();
-
- DOMAIN_POINT* GetPoints(); // Get CHWTessellator owned pointer to vertices (UV values).
- // Pointer is fixed for lifetime of CHWTessellator object.
- int* GetIndices(); // Get CHWTessellator owned pointer to vertex indices.
- // Pointer is fixed for lifetime of CHWTessellator object.
-
-#define ALLOW_XBOX_360_COMPARISON // Different vertex splitting order. This is NOT D3D11 behavior, just available here for comparison.
- // Setting this define true just allows the XBox split style to be enabled via
- // SetXBox360Mode() below, but by default this XBox360 mode still always starts off DISABLED.
- // The XBox360 always splits from the center of an edge (D3D11 uses ruler function). Splitting
- // from the center causes sliver triangles in transition areas, which cause numerous problems.
- // Note the XBox360 only supports adaptive tessellation via fractional_even partitioning,
- // though this #define lets you try the XBox vertex splitting order with any of the
- // partitioning modes: even, odd, integer or pow2.
-#ifdef ALLOW_XBOX_360_COMPARISON
- void SetXBox360Mode(bool bXboxMode) {m_bXBox360Mode = bXboxMode;}
-#endif
- CHWTessellator();
- ~CHWTessellator();
-//---------------------------------------------------------------------------------------------------------------------------------
- //=============================================================================================================================
- // Some defines so that numbers are usually self commenting
- //=============================================================================================================================
- static const int U = 0; // points on a tri patch
- static const int V = 1;
- static const int W = 2;
- static const int Ueq0 = 0; // edges on a tri patch
- static const int Veq0 = 1;
- static const int Weq0 = 2;
-
- static const int Ueq1 = 2; // edges on a quad patch: Ueq0, Veq0, Ueq1, Veq1
- static const int Veq1 = 3;
-
- static const int QUAD_AXES = 2;
- static const int QUAD_EDGES = 4;
- static const int TRI_EDGES = 3;
- //=============================================================================================================================
-
- enum TESSELLATOR_PARITY // derived from D3D11_TESSELLATOR_PARTITIONING
- { // (note: for integer tessellation, both parities are used)
- TESSELLATOR_PARITY_EVEN,
- TESSELLATOR_PARITY_ODD
- };
-private:
- TESSELLATOR_PARITY m_originalParity; // user chosen parity
- TESSELLATOR_PARITY m_parity; // current parity: if allowing mix of even/odd during discrete
- // tessellation, this can vary from the user defined parity
- D3D11_TESSELLATOR_PARTITIONING m_originalPartitioning; // user chosen partitioning
- D3D11_TESSELLATOR_PARTITIONING m_partitioning; // current partitioning. IsoLines overrides for line density
- D3D11_TESSELLATOR_OUTPUT_PRIMITIVE m_outputPrimitive;
- DOMAIN_POINT* m_Point; // array where we will store u/v's for the points we generate
- int* m_Index; // array where we will store index topology
- int m_NumPoints;
- int m_NumIndices;
-#ifdef ALLOW_XBOX_360_COMPARISON
- bool m_bXBox360Mode;
-#endif
- // PlacePointIn1D below is the workhorse for all position placement.
- // It is code that could run as preamble in a Domain Shader, so the tessellator itself
- // doesn't necessarily need to have floating point.
- // Some per-TessFactor fixed context is needed, and that can be computed wherever
- // the TessFactor reduction is done, perhaps as Hull Shader postamble - this is shared
- // for all point evaluation.
- typedef struct TESS_FACTOR_CONTEXT
- {
- FXP fxpInvNumSegmentsOnFloorTessFactor;
- FXP fxpInvNumSegmentsOnCeilTessFactor;
- FXP fxpHalfTessFactorFraction;
- int numHalfTessFactorPoints;
- int splitPointOnFloorHalfTessFactor;
- } TESS_FACTOR_CONTEXT;
- void ComputeTessFactorContext( FXP fxpTessFactor, TESS_FACTOR_CONTEXT& TessFactorCtx );
- void PlacePointIn1D( const TESS_FACTOR_CONTEXT& TessFactorCtx, int point, FXP& fxpLocation );
-
- int NumPointsForTessFactor(FXP fxpTessFactor);
-
- // Tessellation parity control
- bool Odd() {return (m_parity == TESSELLATOR_PARITY_ODD) ? true : false;}
- void SetTessellationParity(TESSELLATOR_PARITY parity) {m_parity = parity;}
-
- // HWIntegerPartitioning() - hardware doesn't care about what pow2 partitioning is - the query below is true for
- // both integer and pow2.
- bool HWIntegerPartitioning() {return ((m_partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER)||
- (m_partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)) ? true : false;}
-
- // Tesselation Partitioning control
- void RestorePartitioning() {m_partitioning = m_originalPartitioning;};
- void OverridePartitioning(D3D11_TESSELLATOR_PARTITIONING partitioning) {m_partitioning = partitioning;} //isoline uses this for density
-
- // Call these to generate new points and indices. Max TessFactor storage is already allocated.
- int DefinePoint(FXP u, FXP v, int pointStorageOffset);
- void DefineIndex(int index, int indexStorageOffset);
- void DefineClockwiseTriangle(int index0, int index1, int index2, int indexStorageBaseOffset);
-
- // Couple of trivial ways to generate index data just given points and no other connectivity.
- void DumpAllPoints(); // Make point indices for point rendering mode -
- // redundant, but just here for orthogonality.
- void DumpAllPointsAsInOrderLineList(); // A debug visualization of all the points connected
- // in the order they were generated.
- // Asking to draw line topology on a tri or quad patch will do this
-
-
- // The structures below define the data that is derived given input TessFactors and which
- // is used by point generation and connectivity generation steps (each of which are independent)
- typedef struct PROCESSED_TESS_FACTORS_ISOLINE
- {
- TESSELLATOR_PARITY lineDensityParity;
- TESSELLATOR_PARITY lineDetailParity;
- TESS_FACTOR_CONTEXT lineDensityTessFactorCtx;
- TESS_FACTOR_CONTEXT lineDetailTessFactorCtx;
- bool bPatchCulled;
- int numPointsPerLine;
- int numLines;
- } PROCESSED_TESS_FACTORS_ISOLINE;
- typedef struct PROCESSED_TESS_FACTORS_TRI
- {
- FXP outsideTessFactor[TRI_EDGES];
- FXP insideTessFactor;
- TESSELLATOR_PARITY outsideTessFactorParity[TRI_EDGES];
- TESSELLATOR_PARITY insideTessFactorParity;
- TESS_FACTOR_CONTEXT outsideTessFactorCtx[TRI_EDGES];
- TESS_FACTOR_CONTEXT insideTessFactorCtx;
- bool bJustDoMinimumTessFactor;
- bool bPatchCulled;
- // Stuff below is just specific to the traversal order
- // this code happens to use to generate points/lines
- int numPointsForOutsideEdge[TRI_EDGES];
- int numPointsForInsideTessFactor;
- int insideEdgePointBaseOffset;
- } PROCESSED_TESS_FACTORS_TRI;
- typedef struct PROCESSED_TESS_FACTORS_QUAD
- {
- FXP outsideTessFactor[QUAD_EDGES];
- FXP insideTessFactor[QUAD_AXES];
- TESSELLATOR_PARITY outsideTessFactorParity[QUAD_EDGES];
- TESSELLATOR_PARITY insideTessFactorParity[QUAD_AXES];
- TESS_FACTOR_CONTEXT outsideTessFactorCtx[QUAD_EDGES];
- TESS_FACTOR_CONTEXT insideTessFactorCtx[QUAD_AXES];
- bool bJustDoMinimumTessFactor;
- bool bPatchCulled;
- // Stuff below is just specific to the traversal order
- // this code happens to use to generate points/lines
- int numPointsForOutsideEdge[QUAD_EDGES];
- int numPointsForInsideTessFactor[QUAD_AXES];
- int insideEdgePointBaseOffset;
- } PROCESSED_TESS_FACTORS_QUAD;
-
- // These are the workhorse functions for tessellation:
- // (1) Process input TessFactors
- // (2) Generate points
- // (3) Generate connectivity (can be done in parallel to (2))
- void IsoLineProcessTessFactors( float TessFactor_V_LineDensity, float TessFactor_U_LineDetail, PROCESSED_TESS_FACTORS_ISOLINE& processedTessFactors );
- void IsoLineGeneratePoints( const PROCESSED_TESS_FACTORS_ISOLINE& processedTessFactors );
- void IsoLineGenerateConnectivity( const PROCESSED_TESS_FACTORS_ISOLINE& processedTessFactors );
- void TriProcessTessFactors( float tessFactor_Ueq0, float TessFactor_Veq0, float TessFactor_Weq0, float insideTessFactor, PROCESSED_TESS_FACTORS_TRI& processedTessFactors );
- void TriGeneratePoints( const PROCESSED_TESS_FACTORS_TRI& processedTessFactors );
- void TriGenerateConnectivity( const PROCESSED_TESS_FACTORS_TRI& processedTessFactors );
- void QuadProcessTessFactors( float tessFactor_Ueq0, float tessFactor_Veq0, float tessFactor_Ueq1, float tessFactor_Veq1,
- float insideTessFactor_U, float insideTessFactor_V, PROCESSED_TESS_FACTORS_QUAD& processedTessFactors );
- void QuadGeneratePoints( const PROCESSED_TESS_FACTORS_QUAD& processedTessFactors );
- void QuadGenerateConnectivity( const PROCESSED_TESS_FACTORS_QUAD& processedTessFactors );
-
- // Stitching
- // ---------
- // Given pointers to the beginning of 2 parallel rows of points, and TessFactors for each, stitch them.
- // The assumption is the stitch is symmetric.
- void StitchTransition(int baseIndexOffset, int insideEdgePointBaseOffset, int insideNumHalfTessFactorPoints,
- TESSELLATOR_PARITY insideEdgeTessFactorParity,
- int outsideEdgePointBaseOffset, int outsideNumHalfTessFactorPoints,
- TESSELLATOR_PARITY outsideEdgeTessFactorParity );
- // The interior can just use a simpler stitch.
- enum DIAGONALS
- {
- DIAGONALS_INSIDE_TO_OUTSIDE,
- DIAGONALS_INSIDE_TO_OUTSIDE_EXCEPT_MIDDLE,
- DIAGONALS_MIRRORED
- };
-
- void StitchRegular(bool bTrapezoid, DIAGONALS diagonals, int baseIndexOffset, int numInsideEdgePoints,
- int insideEdgePointBaseOffset, int outsideEdgePointBaseOffset);
-
-//---------------------------------------------------------------------------------------------------------------------------------
- // Index Patching
- // --------------
- // The code below patches index values produces during triangulation, so triangulation doesn't have to know
- // where points should go. I happened to never produce duplicate vertices, but the patching would
- // be simpler if some duplicate vertices were introduced in practice. During point rendering mode however,
- // it is not permitted for duplicate points to show up.
-
- // Since the points are generated in concentric rings, most of the time, the point locations are
- // sequentially increasing in memory for each side of a ring, which the stitch can take advantage of.
- // However, there are exceptions where the points are not sequentially increasing, such as
- // the 4th row in a given ring, where the last point on the outside of each row is actually the beginning
- // point.
- // So we let the stitching code think it sees sequential vertices, and when it emits a vertex index,
- // we patch it to be the real location.
- int PatchIndexValue(int index);
- typedef struct INDEX_PATCH_CONTEXT
- {
- int insidePointIndexDeltaToRealValue;
- int insidePointIndexBadValue;
- int insidePointIndexReplacementValue;
- int outsidePointIndexPatchBase;
- int outsidePointIndexDeltaToRealValue;
- int outsidePointIndexBadValue;
- int outsidePointIndexReplacementValue;
- } INDEX_PATCH_CONTEXT;
- void SetUsingPatchedIndices(bool bUsingPatchedIndices) {m_bUsingPatchedIndices = bUsingPatchedIndices;}
-
- // A second index patch we have to do handles the leftover strip of quads in the middle of an odd quad patch after
- // finishing all the concentric rings.
- // This also handles the leftover strip of points in the middle of an even quad
- // patch, when stitching the row of triangles up the left side (V major quad) or bottom (U major quad) of the
- // inner ring
- typedef struct INDEX_PATCH_CONTEXT2
- {
- int baseIndexToInvert;
- int indexInversionEndPoint;
- int cornerCaseBadValue;
- int cornerCaseReplacementValue;
- } INDEX_PATCH_CONTEXT2;
- void SetUsingPatchedIndices2(bool bUsingPatchedIndices) {m_bUsingPatchedIndices2 = bUsingPatchedIndices;}
- bool m_bUsingPatchedIndices;
- bool m_bUsingPatchedIndices2;
- INDEX_PATCH_CONTEXT m_IndexPatchContext;
- INDEX_PATCH_CONTEXT2 m_IndexPatchContext2;
-
-};
-
-//=================================================================================================================================
-// CHLSLTessellator: D3D11 Tessellation HLSL Tessellator Interface
-// Demonstrates TessFactor preconditioning code auto-generated by HLSL. Subject to change, but this
-// just represents the effect of shader code the HLSL compiler will generate in the Hull Shader,
-// so it does not affect hardware design at all.
-//=================================================================================================================================
-class CHLSLTessellator : public CHWTessellator
-{
-public:
- void Init( D3D11_TESSELLATOR_PARTITIONING partitioning,
- D3D11_TESSELLATOR_REDUCTION insideTessFactorReduction,
- D3D11_TESSELLATOR_QUAD_REDUCTION_AXIS quadInsideTessFactorReductionAxis,
- D3D11_TESSELLATOR_OUTPUT_PRIMITIVE outputPrimitive);
-
- void TessellateIsoLineDomain( float TessFactor_V_LineDensity,
- float TessFactor_U_LineDetail );
-
- void TessellateTriDomain( float tessFactor_Ueq0,
- float TessFactor_Veq0,
- float TessFactor_Weq0,
- float insideTessFactorScale /*[0..1]*/ );
-
- void TessellateQuadDomain( float TessFactorUeq0,
- float TessFactorVeq0,
- float TessFactorUeq1,
- float TessFactorVeq1,
- float insideTessFactorScaleU /*[0..1]*/,
- float insideTessFactorScaleV /*[0..1]*/ );
-
- int GetPointCount() {return CHWTessellator::GetPointCount();};
- int GetIndexCount() {return CHWTessellator::GetIndexCount();}
-
- DOMAIN_POINT* GetPoints() {return CHWTessellator::GetPoints();} // Get CHLSLTessellator owned pointer to vertices (UV values).
- // Pointer is fixed for lifetime of CHLSLTessellator object.
- int* GetIndices() {return CHWTessellator::GetIndices();} // Get CHLSLTessellator owned pointer to vertex indices.
- // Pointer is fixed for lifetime of CHLSLTessellator object.
-
- // Retrieve TessFactors actually used by the "hardware"
- // This includes clamping to valid range, and more interestingly
- // if integer or pow2 partitioning is being done, the rounded TessFactors can be retrieved.
- // Getting the rounded TessFactors can be useful for geomorphing of displacement maps.
- float GetIsoLineDensityTessFactor() {return m_LastComputedTessFactors[0];}
- float GetIsoLineDetailTessFactor() {return m_LastComputedTessFactors[1];}
- float GetTriUeq0TessFactor() {return m_LastComputedTessFactors[0];}
- float GetTriVeq0TessFactor() {return m_LastComputedTessFactors[1];}
- float GetTriWeq0TessFactor() {return m_LastComputedTessFactors[2];}
- float GetTriInsideTessFactor() {return m_LastComputedTessFactors[3];}
- float GetQuadUeq0TessFactor() {return m_LastComputedTessFactors[0];}
- float GetQuadVeq0TessFactor() {return m_LastComputedTessFactors[1];}
- float GetQuadUeq1TessFactor() {return m_LastComputedTessFactors[2];}
- float GetQuadVeq1TessFactor() {return m_LastComputedTessFactors[3];}
- float GetQuadInsideUTessFactor() {return m_LastComputedTessFactors[4];}
- float GetQuadInsideVTessFactor() {return m_LastComputedTessFactors[5];}
- float GetUnRoundedIsoLineDensityTessFactor() {return m_LastUnRoundedComputedTessFactors[0];}
- float GetUnRoundedIsoLineDetailTessFactor() {return m_LastUnRoundedComputedTessFactors[1];}
- float GetUnRoundedTriUeq0TessFactor() {return m_LastUnRoundedComputedTessFactors[0];}
- float GetUnRoundedTriVeq0TessFactor() {return m_LastUnRoundedComputedTessFactors[1];}
- float GetUnRoundedTriWeq0TessFactor() {return m_LastUnRoundedComputedTessFactors[2];}
- float GetUnRoundedTriInsideTessFactor() {return m_LastUnRoundedComputedTessFactors[3];}
- float GetUnRoundedQuadUeq0TessFactor() {return m_LastUnRoundedComputedTessFactors[0];}
- float GetUnRoundedQuadVeq0TessFactor() {return m_LastUnRoundedComputedTessFactors[1];}
- float GetUnRoundedQuadUeq1TessFactor() {return m_LastUnRoundedComputedTessFactors[2];}
- float GetUnRoundedQuadVeq1TessFactor() {return m_LastUnRoundedComputedTessFactors[3];}
- float GetUnRoundedQuadInsideUTessFactor() {return m_LastUnRoundedComputedTessFactors[4];}
- float GetUnRoundedQuadInsideVTessFactor() {return m_LastUnRoundedComputedTessFactors[5];}
-
- CHLSLTessellator();
-//---------------------------------------------------------------------------------------------------------------------------------
-private:
- TESSELLATOR_PARITY m_originalParity; // user chosen parity
- TESSELLATOR_PARITY m_parity; // current parity: if allowing mix of even/odd during discrete
- // tessellation, this can vary from the user defined parity
- D3D11_TESSELLATOR_PARTITIONING m_originalPartitioning; // user chosen partitioning
- D3D11_TESSELLATOR_PARTITIONING m_partitioning; // current partitioning. IsoLines overrides for line density
- D3D11_TESSELLATOR_OUTPUT_PRIMITIVE m_outputPrimitive;
- D3D11_TESSELLATOR_REDUCTION m_insideTessFactorReduction;
- D3D11_TESSELLATOR_QUAD_REDUCTION_AXIS m_quadInsideTessFactorReductionAxis;
- float m_LastComputedTessFactors[6]; // TessFactors used for last tessellation
- float m_LastUnRoundedComputedTessFactors[6]; // TessFactors used for last tessellation (before they were rounded)
- bool IntegerPartitioning() {return (m_partitioning == D3D11_TESSELLATOR_PARTITIONING_INTEGER) ? true : false;}
- bool Pow2Partitioning() {return (m_partitioning == D3D11_TESSELLATOR_PARTITIONING_POW2)? true : false;}
- void ClampTessFactor(float& TessFactor);
- void RoundUpTessFactor(float& TessFactor);
- void CleanupFloatTessFactor(float& input); // clamp float to [1.0f... +INF] (incl NaN->1.0f)
- void ClampFloatTessFactorScale(float& input); // clamp float to [0.0f... +INF] (incl NaN->0.0f)
-
- // Tessellation parity control
- bool Odd() {return (m_parity == TESSELLATOR_PARITY_ODD) ? true : false;}
- void SetTessellationParity(TESSELLATOR_PARITY parity) {m_parity = parity;}
-
- // Tesselation Partitioning control
- void RestorePartitioning() {m_partitioning = m_originalPartitioning;};
- void OverridePartitioning(D3D11_TESSELLATOR_PARTITIONING partitioning) {m_partitioning = partitioning;} //isoline uses this for density
-
- void IsoLineHLSLProcessTessFactors( float TessFactor_V_LineDensity, float TessFactor_U_LineDetail );
- void TriHLSLProcessTessFactors( float tessFactor_Ueq0, float TessFactor_Veq0, float TessFactor_Weq0, float insideTessFactor );
- void QuadHLSLProcessTessFactors( float TessFactor_Ueq0, float TessFactor_Veq0, float TessFactor_Ueq1, float TessFactor_Veq1,
- float insideTessFactor_U, float insideTessFactor_V );
-
-};
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
deleted file mode 100644
index 8d4104f0af1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ /dev/null
@@ -1,1423 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ****************************************************************************/
-
-#include <stdio.h>
-#include <thread>
-#include <algorithm>
-#include <float.h>
-#include <vector>
-#include <utility>
-#include <fstream>
-#include <string>
-
-#if defined(__linux__) || defined(__gnu_linux__) || defined(__APPLE__)
-#include <pthread.h>
-#include <sched.h>
-#include <unistd.h>
-#endif
-
-#ifdef __APPLE__
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#endif
-
-#include "common/os.h"
-#include "core/api.h"
-#include "context.h"
-#include "frontend.h"
-#include "backend.h"
-#include "rasterizer.h"
-#include "rdtsc_core.h"
-#include "tilemgr.h"
-#include "tileset.h"
-
-
-// ThreadId
-struct Core
-{
- uint32_t procGroup = 0;
- std::vector<uint32_t> threadIds;
-};
-
-struct NumaNode
-{
- uint32_t numaId;
- std::vector<Core> cores;
-};
-
-typedef std::vector<NumaNode> CPUNumaNodes;
-
-void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThreadsPerProcGroup)
-{
- out_nodes.clear();
- out_numThreadsPerProcGroup = 0;
-
-#if defined(_WIN32)
-
- std::vector<KAFFINITY> threadMaskPerProcGroup;
-
- static std::mutex m;
- std::lock_guard<std::mutex> l(m);
-
- DWORD bufSize = 0;
-
- BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufSize);
- SWR_ASSERT(ret == FALSE && GetLastError() == ERROR_INSUFFICIENT_BUFFER);
-
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem =
- (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
- SWR_ASSERT(pBufferMem);
-
- ret = GetLogicalProcessorInformationEx(RelationProcessorCore, pBufferMem, &bufSize);
- SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");
-
- uint32_t count = bufSize / pBufferMem->Size;
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = pBufferMem;
-
- for (uint32_t i = 0; i < count; ++i)
- {
- SWR_ASSERT(pBuffer->Relationship == RelationProcessorCore);
- for (uint32_t g = 0; g < pBuffer->Processor.GroupCount; ++g)
- {
- auto& gmask = pBuffer->Processor.GroupMask[g];
- uint32_t threadId = 0;
- uint32_t procGroup = gmask.Group;
-
- Core* pCore = nullptr;
-
- while (BitScanForwardSizeT((unsigned long*)&threadId, gmask.Mask))
- {
- // clear mask
- KAFFINITY threadMask = KAFFINITY(1) << threadId;
- gmask.Mask &= ~threadMask;
-
- if (procGroup >= threadMaskPerProcGroup.size())
- {
- threadMaskPerProcGroup.resize(procGroup + 1);
- }
-
- if (threadMaskPerProcGroup[procGroup] & threadMask)
- {
- // Already seen this mask. This means that we are in 32-bit mode and
- // have seen more than 32 HW threads for this procGroup
- // Don't use it
-#if defined(_WIN64)
- SWR_INVALID("Shouldn't get here in 64-bit mode");
-#endif
- continue;
- }
-
- threadMaskPerProcGroup[procGroup] |= (KAFFINITY(1) << threadId);
-
- // Find Numa Node
- uint32_t numaId = 0;
- PROCESSOR_NUMBER procNum = {};
- procNum.Group = WORD(procGroup);
- procNum.Number = UCHAR(threadId);
-
- ret = GetNumaProcessorNodeEx(&procNum, (PUSHORT)&numaId);
- SWR_ASSERT(ret);
-
- // Store data
- if (out_nodes.size() <= numaId)
- {
- out_nodes.resize(numaId + 1);
- }
- auto& numaNode = out_nodes[numaId];
- numaNode.numaId = numaId;
-
- if (nullptr == pCore)
- {
- numaNode.cores.push_back(Core());
- pCore = &numaNode.cores.back();
- pCore->procGroup = procGroup;
- }
- pCore->threadIds.push_back(threadId);
- if (procGroup == 0)
- {
- out_numThreadsPerProcGroup++;
- }
- }
- }
- pBuffer = PtrAdd(pBuffer, pBuffer->Size);
- }
-
- free(pBufferMem);
-
-#elif defined(__linux__) || defined(__gnu_linux__)
-
- // Parse /proc/cpuinfo to get full topology
- std::ifstream input("/proc/cpuinfo");
- std::string line;
- char* c;
- uint32_t procId = uint32_t(-1);
- uint32_t coreId = uint32_t(-1);
- uint32_t physId = uint32_t(-1);
-
- while (std::getline(input, line))
- {
- if (line.find("processor") != std::string::npos)
- {
- auto data_start = line.find(": ") + 2;
- procId = std::strtoul(&line.c_str()[data_start], &c, 10);
- continue;
- }
- if (line.find("core id") != std::string::npos)
- {
- auto data_start = line.find(": ") + 2;
- coreId = std::strtoul(&line.c_str()[data_start], &c, 10);
- continue;
- }
- if (line.find("physical id") != std::string::npos)
- {
- auto data_start = line.find(": ") + 2;
- physId = std::strtoul(&line.c_str()[data_start], &c, 10);
- continue;
- }
- if (line.length() == 0)
- {
- if (physId + 1 > out_nodes.size())
- out_nodes.resize(physId + 1);
- auto& numaNode = out_nodes[physId];
- numaNode.numaId = physId;
-
- if (coreId + 1 > numaNode.cores.size())
- numaNode.cores.resize(coreId + 1);
- auto& core = numaNode.cores[coreId];
- core.procGroup = coreId;
- core.threadIds.push_back(procId);
- }
- }
-
- out_numThreadsPerProcGroup = 0;
- for (auto& node : out_nodes)
- {
- for (auto& core : node.cores)
- {
- out_numThreadsPerProcGroup += core.threadIds.size();
- }
- }
-
-#elif defined(__APPLE__)
-
- auto numProcessors = 0;
- auto numCores = 0;
- auto numPhysicalIds = 0;
-
- int value;
- size_t size = sizeof(value);
-
- int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
- SWR_ASSERT(result == 0);
- numPhysicalIds = value;
-
- result = sysctlbyname("hw.logicalcpu", &value, &size, NULL, 0);
- SWR_ASSERT(result == 0);
- numProcessors = value;
-
- result = sysctlbyname("hw.physicalcpu", &value, &size, NULL, 0);
- SWR_ASSERT(result == 0);
- numCores = value;
-
- out_nodes.resize(numPhysicalIds);
-
- for (auto physId = 0; physId < numPhysicalIds; ++physId)
- {
- auto& numaNode = out_nodes[physId];
- auto procId = 0;
-
- numaNode.cores.resize(numCores);
-
- while (procId < numProcessors)
- {
- for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, ++procId)
- {
- auto& core = numaNode.cores[coreId];
-
- core.procGroup = coreId;
- core.threadIds.push_back(procId);
- }
- }
- }
-
- out_numThreadsPerProcGroup = 0;
-
- for (auto& node : out_nodes)
- {
- for (auto& core : node.cores)
- {
- out_numThreadsPerProcGroup += core.threadIds.size();
- }
- }
-
-#else
-
-#error Unsupported platform
-
-#endif
-
- // Prune empty cores and numa nodes
- for (auto node_it = out_nodes.begin(); node_it != out_nodes.end();)
- {
- // Erase empty cores (first)
- for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end();)
- {
- if (core_it->threadIds.size() == 0)
- {
- core_it = node_it->cores.erase(core_it);
- }
- else
- {
- ++core_it;
- }
- }
-
- // Erase empty numa nodes (second)
- if (node_it->cores.size() == 0)
- {
- node_it = out_nodes.erase(node_it);
- }
- else
- {
- ++node_it;
- }
- }
-}
-
-void bindThread(SWR_CONTEXT* pContext,
- uint32_t threadId,
- uint32_t procGroupId = 0,
- bool bindProcGroup = false)
-{
- // Only bind threads when MAX_WORKER_THREADS isn't set.
- if (pContext->threadInfo.SINGLE_THREADED ||
- (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
- {
- return;
- }
-
-#if defined(_WIN32)
-
- GROUP_AFFINITY affinity = {};
- affinity.Group = procGroupId;
-
-#if !defined(_WIN64)
- if (threadId >= 32)
- {
- // Hopefully we don't get here. Logic in CreateThreadPool should prevent this.
- SWR_INVALID("Shouldn't get here");
-
- // In a 32-bit process on Windows it is impossible to bind
- // to logical processors 32-63 within a processor group.
- // In this case set the mask to 0 and let the system assign
- // the processor. Hopefully it will make smart choices.
- affinity.Mask = 0;
- }
- else
-#endif
- {
- // If MAX_WORKER_THREADS is set, only bind to the proc group,
- // Not the individual HW thread.
- if (!bindProcGroup && !pContext->threadInfo.MAX_WORKER_THREADS)
- {
- affinity.Mask = KAFFINITY(1) << threadId;
- }
- else
- {
- affinity.Mask = KAFFINITY(0);
- }
- }
-
- if (!SetThreadGroupAffinity(GetCurrentThread(), &affinity, nullptr))
- {
- SWR_INVALID("Failed to set Thread Affinity");
- }
-
-#elif defined(__linux__) || defined(__gnu_linux__)
-
- cpu_set_t cpuset;
- pthread_t thread = pthread_self();
- CPU_ZERO(&cpuset);
- CPU_SET(threadId, &cpuset);
-
- int err = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
- if (err != 0)
- {
- fprintf(stderr, "pthread_setaffinity_np failure for tid %u: %s\n", threadId, strerror(err));
- }
-
-#endif
-}
-
-INLINE
-uint32_t GetEnqueuedDraw(SWR_CONTEXT* pContext)
-{
- return pContext->dcRing.GetHead();
-}
-
-INLINE
-DRAW_CONTEXT* GetDC(SWR_CONTEXT* pContext, uint32_t drawId)
-{
- return &pContext->dcRing[(drawId - 1) % pContext->MAX_DRAWS_IN_FLIGHT];
-}
-
-INLINE
-bool IDComparesLess(uint32_t a, uint32_t b)
-{
- // Use signed delta to ensure that wrap-around to 0 is correctly handled.
- int32_t delta = int32_t(a - b);
- return (delta < 0);
-}
-
-// returns true if dependency not met
-INLINE
-bool CheckDependency(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t lastRetiredDraw)
-{
- return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
-}
-
-bool CheckDependencyFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t lastRetiredDraw)
-{
- return pDC->dependentFE && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Update client stats.
-INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
-{
- if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStatsBE == false))
- {
- return;
- }
-
- DRAW_DYNAMIC_STATE& dynState = pDC->dynState;
- OSALIGNLINE(SWR_STATS) stats{0};
-
- // Sum up stats across all workers before sending to client.
- for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
- {
- stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
- stats.PsInvocations += dynState.pStats[i].PsInvocations;
- stats.CsInvocations += dynState.pStats[i].CsInvocations;
-
- }
-
-
- pContext->pfnUpdateStats(GetPrivateState(pDC), &stats);
-}
-
-INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
-{
- UpdateClientStats(pContext, workerId, pDC);
-
- if (pDC->retireCallback.pfnCallbackFunc)
- {
- pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
- pDC->retireCallback.userData2,
- pDC->retireCallback.userData3);
-
- // Callbacks to external code *could* change floating point control state
- // Reset our optimal flags
- SetOptimalVectorCSR();
- }
-}
-
-// inlined-only version
-INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
-{
- int32_t result = static_cast<int32_t>(InterlockedDecrement(&pDC->threadsDone));
- SWR_ASSERT(result >= 0);
-
- AR_FLUSH(pDC->drawId);
-
- if (result == 0)
- {
- ExecuteCallbacks(pContext, workerId, pDC);
-
-
- // Cleanup memory allocations
- pDC->pArena->Reset(true);
- if (!pDC->isCompute)
- {
- pDC->pTileMgr->initialize();
- }
- if (pDC->cleanupState)
- {
- pDC->pState->pArena->Reset(true);
- }
-
- _ReadWriteBarrier();
-
- pContext->dcRing.Dequeue(); // Remove from tail
- }
-
- return result;
-}
-
-// available to other translation modules
-int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
-{
- return CompleteDrawContextInl(pContext, 0, pDC);
-}
-
-INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext,
- uint32_t workerId,
- uint32_t& curDrawBE,
- uint32_t& drawEnqueued)
-{
- // increment our current draw id to the first incomplete draw
- drawEnqueued = GetEnqueuedDraw(pContext);
- while (IDComparesLess(curDrawBE, drawEnqueued))
- {
- DRAW_CONTEXT* pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];
-
- // If its not compute and FE is not done then break out of loop.
- if (!pDC->doneFE && !pDC->isCompute)
- break;
-
- bool isWorkComplete =
- pDC->isCompute ? pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();
-
- if (isWorkComplete)
- {
- curDrawBE++;
- CompleteDrawContextInl(pContext, workerId, pDC);
- }
- else
- {
- break;
- }
- }
-
- // If there are no more incomplete draws then return false.
- return IDComparesLess(curDrawBE, drawEnqueued);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief If there is any BE work then go work on it.
-/// @param pContext - pointer to SWR context.
-/// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker
-/// thread
-/// has its own curDrawBE counter and this ensures that each worker processes all
-/// the draws in order.
-/// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its
-/// own set and each time it fails to lock a macrotile, because its already
-/// locked, then it will add that tile to the lockedTiles set. As a worker
-/// begins to work on future draws the lockedTiles ensure that it doesn't work
-/// on tiles that may still have work pending in a previous draw. Additionally,
-/// the lockedTiles is heuristic that can steer a worker back to the same
-/// macrotile that it had been working on in a previous draw.
-/// @returns true if worker thread should shutdown
-bool WorkOnFifoBE(SWR_CONTEXT* pContext,
- uint32_t workerId,
- uint32_t& curDrawBE,
- TileSet& lockedTiles,
- uint32_t numaNode,
- uint32_t numaMask)
-{
- bool bShutdown = false;
-
- // Find the first incomplete draw that has pending work. If no such draw is found then
- // return. FindFirstIncompleteDraw is responsible for incrementing the curDrawBE.
- uint32_t drawEnqueued = 0;
- if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
- {
- return false;
- }
-
- uint32_t lastRetiredDraw =
- pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
-
- // Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
- lockedTiles.clear();
-
- // Try to work on each draw in order of the available draws in flight.
- // 1. If we're on curDrawBE, we can work on any macrotile that is available.
- // 2. If we're trying to work on draws after curDrawBE, we are restricted to
- // working on those macrotiles that are known to be complete in the prior draw to
- // maintain order. The locked tiles provides the history to ensures this.
- for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
- {
- DRAW_CONTEXT* pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
-
- if (pDC->isCompute)
- return false; // We don't look at compute work.
-
- // First wait for FE to be finished with this draw. This keeps threading model simple
- // but if there are lots of bubbles between draws then serializing FE and BE may
- // need to be revisited.
- if (!pDC->doneFE)
- return false;
-
- // If this draw is dependent on a previous draw then we need to bail.
- if (CheckDependency(pContext, pDC, lastRetiredDraw))
- {
- return false;
- }
-
- // Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it.
- auto& macroTiles = pDC->pTileMgr->getDirtyTiles();
-
- for (auto tile : macroTiles)
- {
- uint32_t tileID = tile->mId;
-
- // Only work on tiles for this numa node
- uint32_t x, y;
- pDC->pTileMgr->getTileIndices(tileID, x, y);
- if (((x ^ y) & numaMask) != numaNode)
- {
- _mm_pause();
- continue;
- }
-
- if (!tile->getNumQueued())
- {
- _mm_pause();
- continue;
- }
-
- // can only work on this draw if it's not in use by other threads
- if (lockedTiles.get(tileID))
- {
- _mm_pause();
- continue;
- }
-
- if (tile->tryLock())
- {
- BE_WORK* pWork;
-
- RDTSC_BEGIN(pContext->pBucketMgr, WorkerFoundWork, pDC->drawId);
-
- uint32_t numWorkItems = tile->getNumQueued();
- SWR_ASSERT(numWorkItems);
-
- pWork = tile->peek();
- SWR_ASSERT(pWork);
- if (pWork->type == DRAW)
- {
- pContext->pHotTileMgr->InitializeHotTiles(pContext, pDC, workerId, tileID);
- }
- else if (pWork->type == SHUTDOWN)
- {
- bShutdown = true;
- }
-
- while ((pWork = tile->peek()) != nullptr)
- {
- pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
- tile->dequeue();
- }
- RDTSC_END(pContext->pBucketMgr, WorkerFoundWork, numWorkItems);
-
- _ReadWriteBarrier();
-
- pDC->pTileMgr->markTileComplete(tileID);
-
- // Optimization: If the draw is complete and we're the last one to have worked on it
- // then we can reset the locked list as we know that all previous draws before the
- // next are guaranteed to be complete.
- if ((curDrawBE == i) && (bShutdown || pDC->pTileMgr->isWorkComplete()))
- {
- // We can increment the current BE and safely move to next draw since we know
- // this draw is complete.
- curDrawBE++;
- CompleteDrawContextInl(pContext, workerId, pDC);
-
- lastRetiredDraw++;
-
- lockedTiles.clear();
- break;
- }
-
- if (bShutdown)
- {
- break;
- }
- }
- else
- {
- // This tile is already locked. So let's add it to our locked tiles set. This way we
- // don't try locking this one again.
- lockedTiles.set(tileID);
- _mm_pause();
- }
- }
- }
-
- return bShutdown;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Called when FE work is complete for this DC.
-INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
-{
- if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStatsFE)
- {
- SWR_STATS_FE& stats = pDC->dynState.statsFE;
-
- AR_EVENT(FrontendStatsEvent(pDC->drawId,
- stats.IaVertices,
- stats.IaPrimitives,
- stats.VsInvocations,
- stats.HsInvocations,
- stats.DsInvocations,
- stats.GsInvocations,
- stats.GsPrimitives,
- stats.CInvocations,
- stats.CPrimitives,
- stats.SoPrimStorageNeeded[0],
- stats.SoPrimStorageNeeded[1],
- stats.SoPrimStorageNeeded[2],
- stats.SoPrimStorageNeeded[3],
- stats.SoNumPrimsWritten[0],
- stats.SoNumPrimsWritten[1],
- stats.SoNumPrimsWritten[2],
- stats.SoNumPrimsWritten[3]));
- AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
-
- pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
- }
-
- if (pContext->pfnUpdateSoWriteOffset)
- {
- for (uint32_t i = 0; i < MAX_SO_BUFFERS; ++i)
- {
- if ((pDC->dynState.SoWriteOffsetDirty[i]) &&
- (pDC->pState->state.soBuffer[i].soWriteEnable))
- {
- pContext->pfnUpdateSoWriteOffset(
- GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]);
- }
- }
- }
-
- if (pContext->pfnUpdateStreamOut)
- pContext->pfnUpdateStreamOut(GetPrivateState(pDC), pDC->dynState.soPrims);
-
- // Ensure all streaming writes are globally visible before marking this FE done
- _mm_mfence();
- pDC->doneFE = true;
-
- InterlockedDecrement(&pContext->drawsOutstandingFE);
-}
-
-void WorkOnFifoFE(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawFE)
-{
- // Try to grab the next DC from the ring
- uint32_t drawEnqueued = GetEnqueuedDraw(pContext);
- while (IDComparesLess(curDrawFE, drawEnqueued))
- {
- uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
- DRAW_CONTEXT* pDC = &pContext->dcRing[dcSlot];
- if (pDC->isCompute || pDC->doneFE)
- {
- CompleteDrawContextInl(pContext, workerId, pDC);
- curDrawFE++;
- }
- else
- {
- break;
- }
- }
-
- uint32_t lastRetiredFE = curDrawFE - 1;
- uint32_t curDraw = curDrawFE;
- while (IDComparesLess(curDraw, drawEnqueued))
- {
- uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
- DRAW_CONTEXT* pDC = &pContext->dcRing[dcSlot];
-
- if (!pDC->FeLock && !pDC->isCompute)
- {
- if (CheckDependencyFE(pContext, pDC, lastRetiredFE))
- {
- return;
- }
-
- uint32_t initial = InterlockedCompareExchange((volatile uint32_t*)&pDC->FeLock, 1, 0);
- if (initial == 0)
- {
- // successfully grabbed the DC, now run the FE
- pDC->FeWork.pfnWork(pContext, pDC, workerId, &pDC->FeWork.desc);
-
- CompleteDrawFE(pContext, workerId, pDC);
- }
- else
- {
- _mm_pause();
- }
- }
- else
- {
- _mm_pause();
- }
-
- curDraw++;
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief If there is any compute work then go work on it.
-/// @param pContext - pointer to SWR context.
-/// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker
-/// thread
-/// has its own curDrawBE counter and this ensures that each worker processes all
-/// the draws in order.
-void WorkOnCompute(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE)
-{
- uint32_t drawEnqueued = 0;
- if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
- {
- return;
- }
-
- uint32_t lastRetiredDraw =
- pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
-
- for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
- {
- DRAW_CONTEXT* pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
- if (pDC->isCompute == false)
- return;
-
- // check dependencies
- if (CheckDependency(pContext, pDC, lastRetiredDraw))
- {
- return;
- }
-
- SWR_ASSERT(pDC->pDispatch != nullptr);
- DispatchQueue& queue = *pDC->pDispatch;
-
- // Is there any work remaining?
- if (queue.getNumQueued() > 0)
- {
- void* pSpillFillBuffer = nullptr;
- void* pScratchSpace = nullptr;
- uint32_t threadGroupId = 0;
- while (queue.getWork(threadGroupId))
- {
- queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
- queue.finishedWork();
- }
-
- // Ensure all streaming writes are globally visible before moving onto the next draw
- _mm_mfence();
- }
- }
-}
-
-void BindApiThread(SWR_CONTEXT* pContext, uint32_t apiThreadId)
-{
- if (nullptr == pContext)
- {
- return;
- }
-
- if (apiThreadId >= pContext->threadPool.numReservedThreads)
- {
- if (pContext->threadPool.numReservedThreads)
- {
- const THREAD_DATA& threadData = pContext->threadPool.pApiThreadData[0];
- // Just bind to the process group used for API thread 0
- bindThread(pContext, 0, threadData.procGroupId, true);
- }
- return;
- }
-
- const THREAD_DATA& threadData = pContext->threadPool.pApiThreadData[apiThreadId];
-
- bindThread(
- pContext, threadData.threadId, threadData.procGroupId, threadData.forceBindProcGroup);
-}
-
-template <bool IsFEThread, bool IsBEThread>
-DWORD workerThreadMain(LPVOID pData)
-{
- THREAD_DATA* pThreadData = (THREAD_DATA*)pData;
- SWR_CONTEXT* pContext = pThreadData->pContext;
- uint32_t threadId = pThreadData->threadId;
- uint32_t workerId = pThreadData->workerId;
-
- bindThread(pContext, threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
-
- {
- char threadName[64];
- sprintf_s(threadName,
-#if defined(_WIN32)
- "SWRWorker_%02d_NUMA%d_Core%02d_T%d",
-#else
- // linux pthread name limited to 16 chars (including \0)
- "w%03d-n%d-c%03d-t%d",
-#endif
- workerId,
- pThreadData->numaId,
- pThreadData->coreId,
- pThreadData->htId);
- SetCurrentThreadName(threadName);
- }
-
- RDTSC_INIT(pContext->pBucketMgr, threadId);
-
- // Only need offset numa index from base for correct masking
- uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE;
- uint32_t numaMask = pContext->threadPool.numaMask;
-
- SetOptimalVectorCSR();
-
- // Track tiles locked by other threads. If we try to lock a macrotile and find its already
- // locked then we'll add it to this list so that we don't try and lock it again.
- TileSet lockedTiles;
-
- // each worker has the ability to work on any of the queued draws as long as certain
- // conditions are met. the data associated
- // with a draw is guaranteed to be active as long as a worker hasn't signaled that he
- // has moved on to the next draw when he determines there is no more work to do. The api
- // thread will not increment the head of the dc ring until all workers have moved past the
- // current head.
- // the logic to determine what to work on is:
- // 1- try to work on the FE any draw that is queued. For now there are no dependencies
- // on the FE work, so any worker can grab any FE and process in parallel. Eventually
- // we'll need dependency tracking to force serialization on FEs. The worker will try
- // to pick an FE by atomically incrementing a counter in the swr context. he'll keep
- // trying until he reaches the tail.
- // 2- BE work must be done in strict order. we accomplish this today by pulling work off
- // the oldest draw (ie the head) of the dcRing. the worker can determine if there is
- // any work left by comparing the total # of binned work items and the total # of completed
- // work items. If they are equal, then there is no more work to do for this draw, and
- // the worker can safely increment its oldestDraw counter and move on to the next draw.
- std::unique_lock<std::mutex> lock(pContext->WaitLock, std::defer_lock);
-
- auto threadHasWork = [&](uint32_t curDraw) { return curDraw != pContext->dcRing.GetHead(); };
-
- uint32_t curDrawBE = 0;
- uint32_t curDrawFE = 0;
-
- bool bShutdown = false;
-
- while (true)
- {
- if (bShutdown && !threadHasWork(curDrawBE))
- {
- break;
- }
-
- uint32_t loop = 0;
- while (loop++ < KNOB_WORKER_SPIN_LOOP_COUNT && !threadHasWork(curDrawBE))
- {
- _mm_pause();
- }
-
- if (!threadHasWork(curDrawBE))
- {
- lock.lock();
-
- // check for thread idle condition again under lock
- if (threadHasWork(curDrawBE))
- {
- lock.unlock();
- continue;
- }
-
- pContext->FifosNotEmpty.wait(lock);
- lock.unlock();
- }
-
- if (IsBEThread)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0);
- bShutdown |=
- WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
- RDTSC_END(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0);
-
- WorkOnCompute(pContext, workerId, curDrawBE);
- }
-
- if (IsFEThread)
- {
- WorkOnFifoFE(pContext, workerId, curDrawFE);
-
- if (!IsBEThread)
- {
- curDrawBE = curDrawFE;
- }
- }
- }
-
- return 0;
-}
-template <>
-DWORD workerThreadMain<false, false>(LPVOID) = delete;
-
-template <bool IsFEThread, bool IsBEThread>
-DWORD workerThreadInit(LPVOID pData)
-{
-#if defined(_MSC_VER)
- __try
-#endif // _WIN32
- {
- return workerThreadMain<IsFEThread, IsBEThread>(pData);
- }
-
-#if defined(_MSC_VER)
- __except (EXCEPTION_CONTINUE_SEARCH)
- {
- }
-
-#endif // _WIN32
-
- return 1;
-}
-template <>
-DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
-
-static void InitPerThreadStats(SWR_CONTEXT* pContext, uint32_t numThreads)
-{
- // Initialize DRAW_CONTEXT's per-thread stats
- for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
- {
- pContext->dcRing[dc].dynState.pStats =
- (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64);
- memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Creates thread pool info but doesn't launch threads.
-/// @param pContext - pointer to context
-/// @param pPool - pointer to thread pool object.
-void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
-{
- CPUNumaNodes nodes;
- uint32_t numThreadsPerProcGroup = 0;
- CalculateProcessorTopology(nodes, numThreadsPerProcGroup);
- assert(numThreadsPerProcGroup > 0);
-
- // Assumption, for asymmetric topologies, multi-threaded cores will appear
- // in the list before single-threaded cores. This appears to be true for
- // Windows when the total HW threads is limited to 64.
- uint32_t numHWNodes = (uint32_t)nodes.size();
- uint32_t numHWCoresPerNode = (uint32_t)nodes[0].cores.size();
- uint32_t numHWHyperThreads = (uint32_t)nodes[0].cores[0].threadIds.size();
-
-#if defined(_WIN32) && !defined(_WIN64)
- if (!pContext->threadInfo.MAX_WORKER_THREADS)
- {
- // Limit 32-bit windows to bindable HW threads only
- if ((numHWCoresPerNode * numHWHyperThreads) > 32)
- {
- numHWCoresPerNode = 32 / numHWHyperThreads;
- }
- }
-#endif
-
- // Calculate num HW threads. Due to asymmetric topologies, this is not
- // a trivial multiplication.
- uint32_t numHWThreads = 0;
- for (auto const& node : nodes)
- {
- for (auto const& core : node.cores)
- {
- numHWThreads += (uint32_t)core.threadIds.size();
- }
- }
-
- uint32_t numNodes = numHWNodes;
- uint32_t numCoresPerNode = numHWCoresPerNode;
- uint32_t numHyperThreads = numHWHyperThreads;
-
- // Calc used threads per-core
- if (numHyperThreads > pContext->threadInfo.BASE_THREAD)
- {
- numHyperThreads -= pContext->threadInfo.BASE_THREAD;
- }
- else
- {
- SWR_ASSERT(false,
- "Cannot use BASE_THREAD value: %d, maxThreads: %d, reverting BASE_THREAD to 0",
- pContext->threadInfo.BASE_THREAD,
- numHyperThreads);
- pContext->threadInfo.BASE_THREAD = 0;
- }
-
- if (pContext->threadInfo.MAX_THREADS_PER_CORE)
- {
- numHyperThreads = std::min(numHyperThreads, pContext->threadInfo.MAX_THREADS_PER_CORE);
- }
-
- // Prune any cores that don't support the number of threads
- if (numHyperThreads > 1)
- {
- for (auto& node : nodes)
- {
- uint32_t numUsableCores = 0;
- for (auto& core : node.cores)
- {
- numUsableCores += (core.threadIds.size() >= numHyperThreads);
- }
- numCoresPerNode = std::min(numCoresPerNode, numUsableCores);
- }
- }
-
- // Calc used cores per NUMA node
- if (numCoresPerNode > pContext->threadInfo.BASE_CORE)
- {
- numCoresPerNode -= pContext->threadInfo.BASE_CORE;
- }
- else
- {
- SWR_ASSERT(false,
- "Cannot use BASE_CORE value: %d, maxCores: %d, reverting BASE_CORE to 0",
- pContext->threadInfo.BASE_CORE,
- numCoresPerNode);
- pContext->threadInfo.BASE_CORE = 0;
- }
-
- if (pContext->threadInfo.MAX_CORES_PER_NUMA_NODE)
- {
- numCoresPerNode = std::min(numCoresPerNode, pContext->threadInfo.MAX_CORES_PER_NUMA_NODE);
- }
-
- // Calc used NUMA nodes
- if (numNodes > pContext->threadInfo.BASE_NUMA_NODE)
- {
- numNodes -= pContext->threadInfo.BASE_NUMA_NODE;
- }
- else
- {
- SWR_ASSERT(
- false,
- "Cannot use BASE_NUMA_NODE value: %d, maxNodes: %d, reverting BASE_NUMA_NODE to 0",
- pContext->threadInfo.BASE_NUMA_NODE,
- numNodes);
- pContext->threadInfo.BASE_NUMA_NODE = 0;
- }
-
- if (pContext->threadInfo.MAX_NUMA_NODES)
- {
- numNodes = std::min(numNodes, pContext->threadInfo.MAX_NUMA_NODES);
- }
-
- // Calculate numThreads - at this point everything should be symmetric
- uint32_t numThreads = numNodes * numCoresPerNode * numHyperThreads;
- SWR_REL_ASSERT(numThreads <= numHWThreads);
-
- uint32_t& numAPIReservedThreads = pContext->apiThreadInfo.numAPIReservedThreads;
- uint32_t& numAPIThreadsPerCore = pContext->apiThreadInfo.numAPIThreadsPerCore;
- uint32_t numRemovedThreads = 0;
-
- if (pContext->threadInfo.SINGLE_THREADED)
- {
- numAPIReservedThreads = 0;
- numThreads = 1;
- pContext->NumWorkerThreads = 1;
- pContext->NumFEThreads = 1;
- pContext->NumBEThreads = 1;
- pPool->numThreads = 0;
- }
- else if (pContext->threadInfo.MAX_WORKER_THREADS)
- {
- numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, numHWThreads);
- pContext->threadInfo.BASE_NUMA_NODE = 0;
- pContext->threadInfo.BASE_CORE = 0;
- pContext->threadInfo.BASE_THREAD = 0;
- numAPIReservedThreads = 0;
- }
- else
- {
- if (numAPIReservedThreads >= numThreads)
- {
- numAPIReservedThreads = 0;
- }
- else if (numAPIReservedThreads)
- {
- numAPIThreadsPerCore = std::min(numAPIThreadsPerCore, numHWHyperThreads);
-
- if (0 == numAPIThreadsPerCore)
- {
- numAPIThreadsPerCore = numHWHyperThreads;
- }
-
- numRemovedThreads = numAPIReservedThreads;
- if (numAPIThreadsPerCore == 2 && numHyperThreads == 1)
- {
- // Adjust removed threads to make logic below work
- numRemovedThreads =
- std::max(1U, (numRemovedThreads + numAPIThreadsPerCore - 1) / 2);
- }
-
- numThreads -= numRemovedThreads;
- }
- }
-
- InitPerThreadStats(pContext, numThreads);
-
- if (pContext->threadInfo.SINGLE_THREADED)
- {
- numAPIReservedThreads = 0;
- numThreads = 1;
- }
-
- if (numAPIReservedThreads)
- {
- pPool->pApiThreadData = new (std::nothrow) THREAD_DATA[numAPIReservedThreads];
- SWR_ASSERT(pPool->pApiThreadData);
- if (!pPool->pApiThreadData)
- {
- numAPIReservedThreads = 0;
- }
- else
- {
- memset(pPool->pApiThreadData, 0, sizeof(THREAD_DATA) * numAPIReservedThreads);
- }
- }
- pPool->numReservedThreads = numAPIReservedThreads;
-
- pPool->numThreads = numThreads;
- pContext->NumWorkerThreads = pPool->numThreads;
-
- pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads];
- assert(pPool->pThreadData);
- memset(pPool->pThreadData, 0, sizeof(THREAD_DATA) * pPool->numThreads);
- pPool->numaMask = 0;
-
- // Allocate worker private data
- pPool->pWorkerPrivateDataArray = nullptr;
- if (pContext->workerPrivateState.perWorkerPrivateStateSize == 0)
- {
- pContext->workerPrivateState.perWorkerPrivateStateSize = sizeof(SWR_WORKER_DATA);
- pContext->workerPrivateState.pfnInitWorkerData = nullptr;
- pContext->workerPrivateState.pfnFinishWorkerData = nullptr;
- }
-
- // initialize contents of SWR_WORKER_DATA
- size_t perWorkerSize =
- AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
- size_t totalSize = perWorkerSize * pPool->numThreads;
- if (totalSize)
- {
- pPool->pWorkerPrivateDataArray = AlignedMalloc(totalSize, 64);
- SWR_ASSERT(pPool->pWorkerPrivateDataArray);
-
- void* pWorkerData = pPool->pWorkerPrivateDataArray;
- for (uint32_t i = 0; i < pPool->numThreads; ++i)
- {
- pPool->pThreadData[i].pWorkerPrivateData = pWorkerData;
- if (pContext->workerPrivateState.pfnInitWorkerData)
- {
- pContext->workerPrivateState.pfnInitWorkerData(pContext, pWorkerData, i);
- }
- pWorkerData = PtrAdd(pWorkerData, perWorkerSize);
- }
- }
-
- if (pContext->threadInfo.SINGLE_THREADED)
- {
- return;
- }
-
- pPool->pThreads = new (std::nothrow) THREAD_PTR[pPool->numThreads];
- assert(pPool->pThreads);
-
- if (pContext->threadInfo.MAX_WORKER_THREADS)
- {
- bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
- uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
- // When MAX_WORKER_THREADS is set we don't bother to bind to specific HW threads
- // But Windows will still require binding to specific process groups
- for (uint32_t workerId = 0; workerId < numThreads; ++workerId)
- {
- pPool->pThreadData[workerId].workerId = workerId;
- pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
- pPool->pThreadData[workerId].threadId = 0;
- pPool->pThreadData[workerId].numaId = 0;
- pPool->pThreadData[workerId].coreId = 0;
- pPool->pThreadData[workerId].htId = 0;
- pPool->pThreadData[workerId].pContext = pContext;
- pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
-
- pContext->NumBEThreads++;
- pContext->NumFEThreads++;
- }
- }
- else
- {
- // numa distribution assumes workers on all nodes
- bool useNuma = true;
- if (numCoresPerNode * numHyperThreads == 1)
- {
- useNuma = false;
- }
-
- if (useNuma)
- {
- pPool->numaMask = numNodes - 1; // Only works for 2**n numa nodes (1, 2, 4, etc.)
- }
- else
- {
- pPool->numaMask = 0;
- }
-
- uint32_t workerId = 0;
- uint32_t numReservedThreads = numAPIReservedThreads;
- for (uint32_t n = 0; n < numNodes; ++n)
- {
- if ((n + pContext->threadInfo.BASE_NUMA_NODE) >= nodes.size())
- {
- break;
- }
- auto& node = nodes[n + pContext->threadInfo.BASE_NUMA_NODE];
- uint32_t numCores = numCoresPerNode;
- for (uint32_t c = 0; c < numCores; ++c)
- {
- if ((c + pContext->threadInfo.BASE_CORE) >= node.cores.size())
- {
- break;
- }
-
- auto& core = node.cores[c + pContext->threadInfo.BASE_CORE];
- for (uint32_t t = 0; t < numHyperThreads; ++t)
- {
- if ((t + pContext->threadInfo.BASE_THREAD) >= core.threadIds.size())
- {
- break;
- }
-
- if (numRemovedThreads)
- {
- --numRemovedThreads;
- assert(numReservedThreads);
- --numReservedThreads;
- pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
- pPool->pApiThreadData[numReservedThreads].procGroupId = core.procGroup;
- pPool->pApiThreadData[numReservedThreads].threadId = core.threadIds[t];
- pPool->pApiThreadData[numReservedThreads].numaId =
- useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
- pPool->pApiThreadData[numReservedThreads].coreId =
- c + pContext->threadInfo.BASE_CORE;
- pPool->pApiThreadData[numReservedThreads].htId =
- t + pContext->threadInfo.BASE_THREAD;
- pPool->pApiThreadData[numReservedThreads].pContext = pContext;
- pPool->pApiThreadData[numReservedThreads].forceBindProcGroup = false;
-
- if (numAPIThreadsPerCore > numHyperThreads && numReservedThreads)
- {
- --numReservedThreads;
- pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
- pPool->pApiThreadData[numReservedThreads].procGroupId = core.procGroup;
- pPool->pApiThreadData[numReservedThreads].threadId =
- core.threadIds[t + 1];
- pPool->pApiThreadData[numReservedThreads].numaId =
- useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
- pPool->pApiThreadData[numReservedThreads].coreId =
- c + pContext->threadInfo.BASE_CORE;
- pPool->pApiThreadData[numReservedThreads].htId =
- t + pContext->threadInfo.BASE_THREAD;
- pPool->pApiThreadData[numReservedThreads].pContext = pContext;
- pPool->pApiThreadData[numReservedThreads].forceBindProcGroup = false;
- }
-
- continue;
- }
-
- SWR_ASSERT(workerId < numThreads);
-
- pPool->pThreadData[workerId].workerId = workerId;
- pPool->pThreadData[workerId].procGroupId = core.procGroup;
- pPool->pThreadData[workerId].threadId =
- core.threadIds[t + pContext->threadInfo.BASE_THREAD];
- pPool->pThreadData[workerId].numaId =
- useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
- pPool->pThreadData[workerId].coreId = c + pContext->threadInfo.BASE_CORE;
- pPool->pThreadData[workerId].htId = t + pContext->threadInfo.BASE_THREAD;
- pPool->pThreadData[workerId].pContext = pContext;
- pPool->pThreadData[workerId].forceBindProcGroup = false;
-
- pContext->NumBEThreads++;
- pContext->NumFEThreads++;
-
- ++workerId;
- }
- }
- }
- SWR_ASSERT(workerId == pContext->NumWorkerThreads);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Launches worker threads in thread pool.
-/// @param pContext - pointer to context
-/// @param pPool - pointer to thread pool object.
-void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
-{
- if (pContext->threadInfo.SINGLE_THREADED)
- {
- return;
- }
-
- for (uint32_t workerId = 0; workerId < pContext->NumWorkerThreads; ++workerId)
- {
- pPool->pThreads[workerId] =
- new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Destroys thread pool.
-/// @param pContext - pointer to context
-/// @param pPool - pointer to thread pool object.
-void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
-{
- // Wait for all threads to finish
- SwrWaitForIdle(pContext);
-
- // Wait for threads to finish and destroy them
- for (uint32_t t = 0; t < pPool->numThreads; ++t)
- {
- if (!pContext->threadInfo.SINGLE_THREADED)
- {
- // Detach from thread. Cannot join() due to possibility (in Windows) of code
- // in some DLLMain(THREAD_DETACH case) blocking the thread until after this returns.
- pPool->pThreads[t]->detach();
- delete (pPool->pThreads[t]);
- }
-
- if (pContext->workerPrivateState.pfnFinishWorkerData)
- {
- pContext->workerPrivateState.pfnFinishWorkerData(
- pContext, pPool->pThreadData[t].pWorkerPrivateData, t);
- }
- }
-
- delete[] pPool->pThreads;
-
- // Clean up data used by threads
- delete[] pPool->pThreadData;
- delete[] pPool->pApiThreadData;
-
- AlignedFree(pPool->pWorkerPrivateDataArray);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h
deleted file mode 100644
index 3072bbc835d..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file threads.h
- *
- * @brief Definitions for SWR threading model.
- *
- ******************************************************************************/
-#pragma once
-
-#include "knobs.h"
-
-#include <unordered_set>
-#include <thread>
-typedef std::thread* THREAD_PTR;
-
-struct SWR_CONTEXT;
-struct DRAW_CONTEXT;
-struct SWR_WORKER_PRIVATE_STATE;
-
-struct THREAD_DATA
-{
- void* pWorkerPrivateData; // Pointer to per-worker private data
- uint32_t procGroupId; // Will always be 0 for non-Windows OS
- uint32_t threadId; // within the procGroup for Windows
- uint32_t numaId; // NUMA node id
- uint32_t coreId; // Core id
- uint32_t htId; // Hyperthread id
- uint32_t workerId; // index of worker in total thread data
- void* clipperData; // pointer to hang clipper-private data on
- SWR_CONTEXT* pContext;
- bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
-};
-
-struct THREAD_POOL
-{
- THREAD_PTR* pThreads;
- uint32_t numThreads;
- uint32_t numaMask;
- THREAD_DATA* pThreadData;
- void* pWorkerPrivateDataArray; // All memory for worker private data
- uint32_t numReservedThreads; // Number of threads reserved for API use
- THREAD_DATA* pApiThreadData;
-};
-
-struct TileSet;
-
-void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
-void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
-void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
-
-// Expose FE and BE worker functions to the API thread if single threaded
-void WorkOnFifoFE(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawFE);
-bool WorkOnFifoBE(SWR_CONTEXT* pContext,
- uint32_t workerId,
- uint32_t& curDrawBE,
- TileSet& usedTiles,
- uint32_t numaNode,
- uint32_t numaMask);
-void WorkOnCompute(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE);
-int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);
-
-void BindApiThread(SWR_CONTEXT* pContext, uint32_t apiThreadId);
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
deleted file mode 100644
index a02fa336277..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ /dev/null
@@ -1,454 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file tilemgr.cpp
- *
- * @brief Implementation for Macro Tile Manager which provides the facilities
- * for threads to work on an macro tile.
- *
- ******************************************************************************/
-#include <unordered_map>
-
-#include "fifo.hpp"
-#include "core/tilemgr.h"
-#include "core/multisample.h"
-#include "rdtsc_core.h"
-
-MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena) {}
-
-void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK* pWork)
-{
- // Should not enqueue more then what we have backing for in the hot tile manager.
- SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
- SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
-
- if ((x & ~(KNOB_NUM_HOT_TILES_X - 1)) | (y & ~(KNOB_NUM_HOT_TILES_Y - 1)))
- {
- return;
- }
-
- uint32_t id = getTileId(x, y);
-
- if (id >= mTiles.size())
- {
- mTiles.resize((16 + id) * 2);
- }
-
- MacroTileQueue* pTile = mTiles[id];
- if (!pTile)
- {
- pTile = mTiles[id] = new MacroTileQueue();
- }
- pTile->mWorkItemsFE++;
- pTile->mId = id;
-
- if (pTile->mWorkItemsFE == 1)
- {
- pTile->clear(mArena);
- mDirtyTiles.push_back(pTile);
- }
-
- mWorkItemsProduced++;
- pTile->enqueue_try_nosync(mArena, pWork);
-}
-
-void MacroTileMgr::markTileComplete(uint32_t id)
-{
- SWR_ASSERT(mTiles.size() > id);
- MacroTileQueue& tile = *mTiles[id];
- uint32_t numTiles = tile.mWorkItemsFE;
- InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
-
- _ReadWriteBarrier();
- tile.mWorkItemsBE += numTiles;
- SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
-
- // clear out tile, but defer fifo clear until the next DC first queues to it.
- // this prevents worker threads from constantly locking a completed macro tile
- tile.mWorkItemsFE = 0;
- tile.mWorkItemsBE = 0;
-}
-
-HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- HANDLE hWorkerPrivateData,
- uint32_t macroID,
- SWR_RENDERTARGET_ATTACHMENT attachment,
- bool create,
- uint32_t numSamples,
- uint32_t renderTargetArrayIndex)
-{
- uint32_t x, y;
- MacroTileMgr::getTileIndices(macroID, x, y);
-
- SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
- SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
-
- HotTileSet& tile = mHotTiles[x][y];
- HOTTILE& hotTile = tile.Attachment[attachment];
- if (hotTile.pBuffer == NULL)
- {
- if (create)
- {
- uint32_t size = numSamples * mHotTileSize[attachment];
- uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
- hotTile.pBuffer =
- (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
- hotTile.state = HOTTILE_INVALID;
- hotTile.numSamples = numSamples;
- hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
- }
- else
- {
- return NULL;
- }
- }
- else
- {
- // free the old tile and create a new one with enough space to hold all samples
- if (numSamples > hotTile.numSamples)
- {
- // tile should be either uninitialized or resolved if we're deleting and switching to a
- // new sample count
- SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) ||
- (hotTile.state == HOTTILE_CLEAR));
- FreeHotTileMem(hotTile.pBuffer);
-
- uint32_t size = numSamples * mHotTileSize[attachment];
- uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
- hotTile.pBuffer =
- (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
- hotTile.state = HOTTILE_INVALID;
- hotTile.numSamples = numSamples;
- }
-
- // if requested render target array index isn't currently loaded, need to store out the
- // current hottile and load the requested array slice
- if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
- {
- SWR_FORMAT format;
- switch (attachment)
- {
- case SWR_ATTACHMENT_COLOR0:
- case SWR_ATTACHMENT_COLOR1:
- case SWR_ATTACHMENT_COLOR2:
- case SWR_ATTACHMENT_COLOR3:
- case SWR_ATTACHMENT_COLOR4:
- case SWR_ATTACHMENT_COLOR5:
- case SWR_ATTACHMENT_COLOR6:
- case SWR_ATTACHMENT_COLOR7:
- format = KNOB_COLOR_HOT_TILE_FORMAT;
- break;
- case SWR_ATTACHMENT_DEPTH:
- format = KNOB_DEPTH_HOT_TILE_FORMAT;
- break;
- case SWR_ATTACHMENT_STENCIL:
- format = KNOB_STENCIL_HOT_TILE_FORMAT;
- break;
- default:
- SWR_INVALID("Unknown attachment: %d", attachment);
- format = KNOB_COLOR_HOT_TILE_FORMAT;
- break;
- }
-
- if (hotTile.state == HOTTILE_CLEAR)
- {
- if (attachment == SWR_ATTACHMENT_STENCIL)
- ClearStencilHotTile(&hotTile);
- else if (attachment == SWR_ATTACHMENT_DEPTH)
- ClearDepthHotTile(&hotTile);
- else
- ClearColorHotTile(&hotTile);
-
- hotTile.state = HOTTILE_DIRTY;
- }
-
- if (hotTile.state == HOTTILE_DIRTY)
- {
- pContext->pfnStoreTile(pDC,
- hWorkerPrivateData,
- format,
- attachment,
- x * KNOB_MACROTILE_X_DIM,
- y * KNOB_MACROTILE_Y_DIM,
- hotTile.renderTargetArrayIndex,
- hotTile.pBuffer);
- }
-
- pContext->pfnLoadTile(pDC,
- hWorkerPrivateData,
- format,
- attachment,
- x * KNOB_MACROTILE_X_DIM,
- y * KNOB_MACROTILE_Y_DIM,
- renderTargetArrayIndex,
- hotTile.pBuffer);
-
- hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
- hotTile.state = HOTTILE_RESOLVED;
- }
- }
- return &tile.Attachment[attachment];
-}
-
-HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t macroID,
- SWR_RENDERTARGET_ATTACHMENT attachment,
- bool create,
- uint32_t numSamples)
-{
- uint32_t x, y;
- MacroTileMgr::getTileIndices(macroID, x, y);
-
- SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
- SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
-
- HotTileSet& tile = mHotTiles[x][y];
- HOTTILE& hotTile = tile.Attachment[attachment];
- if (hotTile.pBuffer == NULL)
- {
- if (create)
- {
- uint32_t size = numSamples * mHotTileSize[attachment];
- hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64);
- hotTile.state = HOTTILE_INVALID;
- hotTile.numSamples = numSamples;
- hotTile.renderTargetArrayIndex = 0;
- }
- else
- {
- return NULL;
- }
- }
-
- return &hotTile;
-}
-
-void HotTileMgr::ClearColorHotTile(
- const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
-{
- // Load clear color into SIMD register...
- float* pClearData = (float*)(pHotTile->clearData);
- simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
- simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
- simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
- simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
-
- float* pfBuf = (float*)pHotTile->pBuffer;
- uint32_t numSamples = pHotTile->numSamples;
-
- for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
- si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
- {
- _simd16_store_ps(pfBuf, valR);
- pfBuf += KNOB_SIMD16_WIDTH;
-
- _simd16_store_ps(pfBuf, valG);
- pfBuf += KNOB_SIMD16_WIDTH;
-
- _simd16_store_ps(pfBuf, valB);
- pfBuf += KNOB_SIMD16_WIDTH;
-
- _simd16_store_ps(pfBuf, valA);
- pfBuf += KNOB_SIMD16_WIDTH;
- }
- }
- }
-}
-
-void HotTileMgr::ClearDepthHotTile(
- const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
-{
- // Load clear color into SIMD register...
- float* pClearData = (float*)(pHotTile->clearData);
- simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
-
- float* pfBuf = (float*)pHotTile->pBuffer;
- uint32_t numSamples = pHotTile->numSamples;
-
- for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
- si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
- {
- _simd16_store_ps(pfBuf, valZ);
- pfBuf += KNOB_SIMD16_WIDTH;
- }
- }
- }
-}
-
-void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
-{
- // convert from F32 to U8.
- uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
- // broadcast 32x into __m256i...
- simd16scalari valS = _simd16_set1_epi8(clearVal);
-
- simd16scalari* pBuf = (simd16scalari*)pHotTile->pBuffer;
- uint32_t numSamples = pHotTile->numSamples;
-
- for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
- si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
- {
- _simd16_store_si(pBuf, valS);
- pBuf += 1;
- }
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief InitializeHotTiles
-/// for draw calls, we initialize the active hot tiles and perform deferred
-/// load on them if tile is in invalid state. we do this in the outer thread
-/// loop instead of inside the draw routine itself mainly for performance,
-/// to avoid unnecessary setup every triangle
-/// @todo support deferred clear
-/// @param pCreateInfo - pointer to creation info.
-void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroID)
-{
- const API_STATE& state = GetApiState(pDC);
- HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-
- uint32_t x, y;
- MacroTileMgr::getTileIndices(macroID, x, y);
- x *= KNOB_MACROTILE_X_DIM;
- y *= KNOB_MACROTILE_Y_DIM;
-
- uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
-
- // check RT if enabled
- unsigned long rtSlot = 0;
- uint32_t colorHottileEnableMask = state.colorHottileEnable;
- while (_BitScanForward(&rtSlot, colorHottileEnableMask))
- {
- HOTTILE* pHotTile =
- GetHotTile(pContext,
- pDC,
- hWorkerPrivateData,
- macroID,
- (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
- true,
- numSamples);
-
- if (pHotTile->state == HOTTILE_INVALID)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
- // invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(pDC,
- hWorkerPrivateData,
- KNOB_COLOR_HOT_TILE_FORMAT,
- (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
- x,
- y,
- pHotTile->renderTargetArrayIndex,
- pHotTile->pBuffer);
- pHotTile->state = HOTTILE_RESOLVED;
- RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
- }
- else if (pHotTile->state == HOTTILE_CLEAR)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
- // Clear the tile.
- ClearColorHotTile(pHotTile);
- pHotTile->state = HOTTILE_DIRTY;
- RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
- }
- colorHottileEnableMask &= ~(1 << rtSlot);
- }
-
- // check depth if enabled
- if (state.depthHottileEnable)
- {
- HOTTILE* pHotTile = GetHotTile(
- pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
- if (pHotTile->state == HOTTILE_INVALID)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
- // invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(pDC,
- hWorkerPrivateData,
- KNOB_DEPTH_HOT_TILE_FORMAT,
- SWR_ATTACHMENT_DEPTH,
- x,
- y,
- pHotTile->renderTargetArrayIndex,
- pHotTile->pBuffer);
- pHotTile->state = HOTTILE_DIRTY;
- RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
- }
- else if (pHotTile->state == HOTTILE_CLEAR)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
- // Clear the tile.
- ClearDepthHotTile(pHotTile);
- pHotTile->state = HOTTILE_DIRTY;
- RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
- }
- }
-
- // check stencil if enabled
- if (state.stencilHottileEnable)
- {
- HOTTILE* pHotTile = GetHotTile(
- pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
- if (pHotTile->state == HOTTILE_INVALID)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
- // invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(pDC,
- hWorkerPrivateData,
- KNOB_STENCIL_HOT_TILE_FORMAT,
- SWR_ATTACHMENT_STENCIL,
- x,
- y,
- pHotTile->renderTargetArrayIndex,
- pHotTile->pBuffer);
- pHotTile->state = HOTTILE_DIRTY;
- RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
- }
- else if (pHotTile->state == HOTTILE_CLEAR)
- {
- RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
- // Clear the tile.
- ClearStencilHotTile(pHotTile);
- pHotTile->state = HOTTILE_DIRTY;
- RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
- }
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
deleted file mode 100644
index fb8a4a14881..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ /dev/null
@@ -1,354 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file tilemgr.h
- *
- * @brief Definitions for Macro Tile Manager which provides the facilities
- * for threads to work on an macro tile.
- *
- ******************************************************************************/
-#pragma once
-
-#include <set>
-#include <unordered_map>
-#include "common/formats.h"
-#include "common/intrin.h"
-#include "fifo.hpp"
-#include "context.h"
-#include "format_traits.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// MacroTile - work queue for a tile.
-//////////////////////////////////////////////////////////////////////////
-struct MacroTileQueue
-{
- MacroTileQueue() {}
- ~MacroTileQueue() { destroy(); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Returns number of work items queued for this tile.
- uint32_t getNumQueued() { return mFifo.getNumQueued(); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Attempt to lock the work fifo. If already locked then return false.
- bool tryLock() { return mFifo.tryLock(); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Clear fifo and unlock it.
- template <typename ArenaT>
- void clear(ArenaT& arena)
- {
- mFifo.clear(arena);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Peek at work sitting at the front of the fifo.
- BE_WORK* peek() { return mFifo.peek(); }
-
- template <typename ArenaT>
- bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
- {
- return mFifo.enqueue_try_nosync(arena, entry);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Move to next work item
- void dequeue() { mFifo.dequeue_noinc(); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Destroy fifo
- void destroy() { mFifo.destroy(); }
-
- ///@todo This will all be private.
- uint32_t mWorkItemsFE = 0;
- uint32_t mWorkItemsBE = 0;
- uint32_t mId = 0;
-
-private:
- QUEUE<BE_WORK> mFifo;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// MacroTileMgr - Manages macrotiles for a draw.
-//////////////////////////////////////////////////////////////////////////
-class MacroTileMgr
-{
-public:
- MacroTileMgr(CachingArena& arena);
- ~MacroTileMgr()
- {
- for (auto* pTile : mTiles)
- {
- delete pTile;
- }
- }
-
- INLINE void initialize()
- {
- mWorkItemsProduced = 0;
- mWorkItemsConsumed = 0;
-
- mDirtyTiles.clear();
- }
-
- INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
- void markTileComplete(uint32_t id);
-
- INLINE bool isWorkComplete() { return mWorkItemsProduced == mWorkItemsConsumed; }
-
- void enqueue(uint32_t x, uint32_t y, BE_WORK* pWork);
-
- static INLINE void getTileIndices(uint32_t tileID, uint32_t& x, uint32_t& y)
- {
- // Morton / Z order of tiles
- x = pext_u32(tileID, 0x55555555);
- y = pext_u32(tileID, 0xAAAAAAAA);
- }
-
- static INLINE uint32_t getTileId(uint32_t x, uint32_t y)
- {
- // Morton / Z order of tiles
- return pdep_u32(x, 0x55555555) | pdep_u32(y, 0xAAAAAAAA);
- }
-
-private:
- CachingArena& mArena;
- std::vector<MacroTileQueue*> mTiles;
-
- // Any tile that has work queued to it is a dirty tile.
- std::vector<MacroTileQueue*> mDirtyTiles;
-
- OSALIGNLINE(long) mWorkItemsProduced{0};
- OSALIGNLINE(volatile long) mWorkItemsConsumed{0};
-};
-
-typedef void (*PFN_DISPATCH)(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t threadGroupId,
- void*& pSpillFillBuffer,
- void*& pScratchSpace);
-
-//////////////////////////////////////////////////////////////////////////
-/// DispatchQueue - work queue for dispatch
-//////////////////////////////////////////////////////////////////////////
-class DispatchQueue
-{
-public:
- DispatchQueue() {}
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Setup the producer consumer counts.
- void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch)
- {
- // The available and outstanding counts start with total tasks.
- // At the start there are N tasks available and outstanding.
- // When both the available and outstanding counts have reached 0 then all work has
- // completed. When a worker starts on a threadgroup then it decrements the available count.
- // When a worker completes a threadgroup then it decrements the outstanding count.
-
- mTasksAvailable = totalTasks;
- mTasksOutstanding = totalTasks;
-
- mpTaskData = pTaskData;
- mPfnDispatch = pfnDispatch;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Returns number of tasks available for this dispatch.
- uint32_t getNumQueued() { return (mTasksAvailable > 0) ? mTasksAvailable : 0; }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Atomically decrement the work available count. If the result
- // is greater than 0 then we can on the associated thread group.
- // Otherwise, there is no more work to do.
- bool getWork(uint32_t& groupId)
- {
- long result = InterlockedDecrement(&mTasksAvailable);
-
- if (result >= 0)
- {
- groupId = result;
- return true;
- }
-
- return false;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Atomically decrement the outstanding count. A worker is notifying
- /// us that he just finished some work. Also, return true if we're
- /// the last worker to complete this dispatch.
- bool finishedWork()
- {
- long result = InterlockedDecrement(&mTasksOutstanding);
- SWR_ASSERT(result >= 0, "Should never oversubscribe work");
-
- return (result == 0) ? true : false;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Work is complete once both the available/outstanding counts have reached 0.
- bool isWorkComplete() { return ((mTasksAvailable <= 0) && (mTasksOutstanding <= 0)); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Return pointer to task data.
- const void* GetTasksData() { return mpTaskData; }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Dispatches a unit of work
- void dispatch(DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t threadGroupId,
- void*& pSpillFillBuffer,
- void*& pScratchSpace)
- {
- SWR_ASSERT(mPfnDispatch != nullptr);
- mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
- }
-
- void* mpTaskData{nullptr}; // The API thread will set this up and the callback task function
- // will interpet this.
- PFN_DISPATCH mPfnDispatch{nullptr}; // Function to call per dispatch
-
- OSALIGNLINE(volatile long) mTasksAvailable{0};
- OSALIGNLINE(volatile long) mTasksOutstanding{0};
-};
-
-/// @note this enum needs to be kept in sync with SWR_TILE_STATE!
-enum HOTTILE_STATE
-{
- HOTTILE_INVALID, // tile is in uninitialized state and should be loaded with surface contents
- // before rendering
- HOTTILE_CLEAR, // tile should be cleared
- HOTTILE_DIRTY, // tile has been rendered to
- HOTTILE_RESOLVED, // tile is consistent with memory (either loaded or stored)
-};
-
-struct HOTTILE
-{
- uint8_t* pBuffer;
- HOTTILE_STATE state;
- uint32_t clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for
- // alignment?
- uint32_t numSamples;
- uint32_t renderTargetArrayIndex; // current render target array index loaded
-};
-
-union HotTileSet
-{
- struct
- {
- HOTTILE Color[SWR_NUM_RENDERTARGETS];
- HOTTILE Depth;
- HOTTILE Stencil;
- };
- HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
-};
-
-class HotTileMgr
-{
-public:
- HotTileMgr()
- {
- memset(mHotTiles, 0, sizeof(mHotTiles));
-
- // cache hottile size
- for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
- {
- mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
- FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
- }
- mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
- FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
- mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
- FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
- }
-
- ~HotTileMgr()
- {
- for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
- {
- for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
- {
- for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
- {
- FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer);
- }
- }
- }
- }
-
- void InitializeHotTiles(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t workerId,
- uint32_t macroID);
-
- HOTTILE* GetHotTile(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- HANDLE hWorkerData,
- uint32_t macroID,
- SWR_RENDERTARGET_ATTACHMENT attachment,
- bool create,
- uint32_t numSamples = 1,
- uint32_t renderTargetArrayIndex = 0);
-
- HOTTILE* GetHotTileNoLoad(SWR_CONTEXT* pContext,
- DRAW_CONTEXT* pDC,
- uint32_t macroID,
- SWR_RENDERTARGET_ATTACHMENT attachment,
- bool create,
- uint32_t numSamples = 1);
-
- static void ClearColorHotTile(const HOTTILE* pHotTile);
- static void ClearDepthHotTile(const HOTTILE* pHotTile);
- static void ClearStencilHotTile(const HOTTILE* pHotTile);
-
-private:
- HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
- uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
-
- void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
- {
- void* p = nullptr;
-#if defined(_WIN32)
- HANDLE hProcess = GetCurrentProcess();
- p = VirtualAllocExNuma(
- hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
-#else
- p = AlignedMalloc(size, align);
-#endif
-
- return p;
- }
-
- void FreeHotTileMem(void* pBuffer)
- {
- if (pBuffer)
- {
-#if defined(_WIN32)
- VirtualFree(pBuffer, 0, MEM_RELEASE);
-#else
- AlignedFree(pBuffer);
-#endif
- }
- }
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/tileset.h b/src/gallium/drivers/swr/rasterizer/core/tileset.h
deleted file mode 100644
index e28c84d789f..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/tileset.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file tileset.h
- *
- * @brief Custom bitset class for managing locked tiles
- *
- ******************************************************************************/
-#pragma once
-
-struct TileSet
-{
- ~TileSet()
- {
- if (m_bits)
- {
- AlignedFree(m_bits);
- }
- }
- INLINE void set(size_t idx)
- {
- _grow(idx);
- size_t& word = _get_word(idx);
- word |= (size_t(1) << (idx & BITS_OFFSET));
- m_maxSet = std::max(m_maxSet, idx + 1);
- }
- INLINE bool get(size_t idx)
- {
- if (idx >= m_size)
- {
- return false;
- }
- size_t word = _get_word(idx);
- return 0 != (word & (size_t(1) << (idx & BITS_OFFSET)));
- }
-
- INLINE void clear()
- {
- if (m_maxSet)
- {
- size_t num_words = (m_maxSet + BITS_OFFSET) / BITS_PER_WORD;
- memset(m_bits, 0, sizeof(size_t) * num_words);
- m_maxSet = 0;
- }
- }
-
-private:
- static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
- static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
-
- size_t m_size = 0;
- size_t m_maxSet = 0;
- size_t* m_bits = nullptr;
-
- INLINE size_t& _get_word(size_t idx) { return m_bits[idx / BITS_PER_WORD]; }
-
- void _grow(size_t idx)
- {
- if (idx < m_size)
- {
- return;
- }
-
- size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
- size_t num_words = new_size / BITS_PER_WORD;
- size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
- size_t copy_words = 0;
-
- if (m_bits)
- {
- copy_words = (m_size + BITS_OFFSET) / BITS_PER_WORD;
- num_words -= copy_words;
- memcpy(newBits, m_bits, copy_words * sizeof(size_t));
-
- AlignedFree(m_bits);
- }
-
- m_bits = newBits;
- m_size = new_size;
-
- memset(&m_bits[copy_words], 0, sizeof(size_t) * num_words);
- }
-};
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h
deleted file mode 100644
index 9b483776be9..00000000000
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ /dev/null
@@ -1,392 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file utils.h
- *
- * @brief Utilities used by SWR core.
- *
- ******************************************************************************/
-#pragma once
-
-#include <string.h>
-#include <type_traits>
-#include <algorithm>
-#include <array>
-#include "common/os.h"
-#include "common/intrin.h"
-#include "common/swr_assert.h"
-#include "core/api.h"
-
-struct simdBBox
-{
- simdscalari ymin;
- simdscalari ymax;
- simdscalari xmin;
- simdscalari xmax;
-};
-
-struct simd16BBox
-{
- simd16scalari ymin;
- simd16scalari ymax;
- simd16scalari xmin;
- simd16scalari xmax;
-};
-
-template <typename SIMD_T>
-struct SIMDBBOX_T
-{
- typename SIMD_T::Integer ymin;
- typename SIMD_T::Integer ymax;
- typename SIMD_T::Integer xmin;
- typename SIMD_T::Integer xmax;
-};
-
-// helper function to unroll loops
-template <int Begin, int End, int Step = 1>
-struct UnrollerL
-{
- template <typename Lambda>
- INLINE static void step(Lambda& func)
- {
- func(Begin);
- UnrollerL<Begin + Step, End, Step>::step(func);
- }
-};
-
-template <int End, int Step>
-struct UnrollerL<End, End, Step>
-{
- template <typename Lambda>
- static void step(Lambda& func)
- {
- }
-};
-
-// helper function to unroll loops, with mask to skip specific iterations
-template <int Begin, int End, int Step = 1, int Mask = 0x7f>
-struct UnrollerLMask
-{
- template <typename Lambda>
- INLINE static void step(Lambda& func)
- {
- if (Mask & (1 << Begin))
- {
- func(Begin);
- }
- UnrollerL<Begin + Step, End, Step>::step(func);
- }
-};
-
-template <int End, int Step, int Mask>
-struct UnrollerLMask<End, End, Step, Mask>
-{
- template <typename Lambda>
- static void step(Lambda& func)
- {
- }
-};
-
-// general CRC compute
-INLINE
-uint32_t ComputeCRC(uint32_t crc, const void* pData, uint32_t size)
-{
-#if defined(_WIN64) || defined(__x86_64__)
- uint32_t sizeInQwords = size / sizeof(uint64_t);
- uint32_t sizeRemainderBytes = size % sizeof(uint64_t);
- uint64_t* pDataWords = (uint64_t*)pData;
- for (uint32_t i = 0; i < sizeInQwords; ++i)
- {
- crc = (uint32_t)_mm_crc32_u64(crc, *pDataWords++);
- }
-#else
- uint32_t sizeInDwords = size / sizeof(uint32_t);
- uint32_t sizeRemainderBytes = size % sizeof(uint32_t);
- uint32_t* pDataWords = (uint32_t*)pData;
- for (uint32_t i = 0; i < sizeInDwords; ++i)
- {
- crc = _mm_crc32_u32(crc, *pDataWords++);
- }
-#endif
-
- uint8_t* pRemainderBytes = (uint8_t*)pDataWords;
- for (uint32_t i = 0; i < sizeRemainderBytes; ++i)
- {
- crc = _mm_crc32_u8(crc, *pRemainderBytes++);
- }
-
- return crc;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Check specified bit within a data word
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-INLINE static bool CheckBit(T word, uint32_t bit)
-{
- return 0 != (word & (T(1) << bit));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Add byte offset to any-type pointer
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-INLINE static T* PtrAdd(T* p, intptr_t offset)
-{
- intptr_t intp = reinterpret_cast<intptr_t>(p);
- return reinterpret_cast<T*>(intp + offset);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Is a power-of-2?
-//////////////////////////////////////////////////////////////////////////
-template <typename T>
-INLINE static bool IsPow2(T value)
-{
- return value == (value & (T(0) - value));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align down to specified alignment
-/// Note: IsPow2(alignment) MUST be true
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1 AlignDownPow2(T1 value, T2 alignment)
-{
- SWR_ASSERT(IsPow2(alignment));
- return value & ~T1(alignment - 1);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align up to specified alignment
-/// Note: IsPow2(alignment) MUST be true
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1 AlignUpPow2(T1 value, T2 alignment)
-{
- return AlignDownPow2(value + T1(alignment - 1), alignment);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align up ptr to specified alignment
-/// Note: IsPow2(alignment) MUST be true
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1* AlignUpPow2(T1* value, T2 alignment)
-{
- return reinterpret_cast<T1*>(
- AlignDownPow2(reinterpret_cast<uintptr_t>(value) + uintptr_t(alignment - 1), alignment));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align down to specified alignment
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1 AlignDown(T1 value, T2 alignment)
-{
- if (IsPow2(alignment))
- {
- return AlignDownPow2(value, alignment);
- }
- return value - T1(value % alignment);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align down to specified alignment
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1* AlignDown(T1* value, T2 alignment)
-{
- return (T1*)AlignDown(uintptr_t(value), alignment);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align up to specified alignment
-/// Note: IsPow2(alignment) MUST be true
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1 AlignUp(T1 value, T2 alignment)
-{
- return AlignDown(value + T1(alignment - 1), alignment);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Align up to specified alignment
-/// Note: IsPow2(alignment) MUST be true
-//////////////////////////////////////////////////////////////////////////
-template <typename T1, typename T2>
-INLINE static T1* AlignUp(T1* value, T2 alignment)
-{
- return AlignDown(PtrAdd(value, alignment - 1), alignment);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// Helper structure used to access an array of elements that don't
-/// correspond to a typical word size.
-//////////////////////////////////////////////////////////////////////////
-template <typename T, size_t BitsPerElementT, size_t ArrayLenT>
-class BitsArray
-{
-private:
- static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
- static const size_t ELEMENTS_PER_WORD = BITS_PER_WORD / BitsPerElementT;
- static const size_t NUM_WORDS = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
- static const size_t ELEMENT_MASK = (size_t(1) << BitsPerElementT) - 1;
-
- static_assert(ELEMENTS_PER_WORD * BitsPerElementT == BITS_PER_WORD,
- "Element size must an integral fraction of pointer size");
-
- size_t m_words[NUM_WORDS] = {};
-
-public:
- T operator[](size_t elementIndex) const
- {
- size_t word = m_words[elementIndex / ELEMENTS_PER_WORD];
- word >>= ((elementIndex % ELEMENTS_PER_WORD) * BitsPerElementT);
- return T(word & ELEMENT_MASK);
- }
-};
-
-// Ranged integer argument for TemplateArgUnroller
-template <typename T, T TMin, T TMax>
-struct RangedArg
-{
- T val;
-};
-
-template <uint32_t TMin, uint32_t TMax>
-using IntArg = RangedArg<uint32_t, TMin, TMax>;
-
-// Recursive template used to auto-nest conditionals. Converts dynamic boolean function
-// arguments to static template arguments.
-template <typename TermT, typename... ArgsB>
-struct TemplateArgUnroller
-{
- //-----------------------------------------
- // Boolean value
- //-----------------------------------------
-
- // Last Arg Terminator
- static typename TermT::FuncType GetFunc(bool bArg)
- {
- if (bArg)
- {
- return TermT::template GetFunc<ArgsB..., std::true_type>();
- }
-
- return TermT::template GetFunc<ArgsB..., std::false_type>();
- }
-
- // Recursively parse args
- template <typename... TArgsT>
- static typename TermT::FuncType GetFunc(bool bArg, TArgsT... remainingArgs)
- {
- if (bArg)
- {
- return TemplateArgUnroller<TermT, ArgsB..., std::true_type>::GetFunc(remainingArgs...);
- }
-
- return TemplateArgUnroller<TermT, ArgsB..., std::false_type>::GetFunc(remainingArgs...);
- }
-
- //-----------------------------------------
- // Ranged value (within specified range)
- //-----------------------------------------
-
- // Last Arg Terminator
- template <typename T, T TMin, T TMax>
- static typename TermT::FuncType GetFunc(RangedArg<T, TMin, TMax> iArg)
- {
- if (iArg.val == TMax)
- {
- return TermT::template GetFunc<ArgsB..., std::integral_constant<T, TMax>>();
- }
- if (TMax > TMin)
- {
- return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
- RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val});
- }
- SWR_ASSUME(false);
- return nullptr;
- }
- template <typename T, T TVal>
- static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg)
- {
- SWR_ASSERT(iArg.val == TVal);
- return TermT::template GetFunc<ArgsB..., std::integral_constant<T, TVal>>();
- }
-
- // Recursively parse args
- template <typename T, T TMin, T TMax, typename... TArgsT>
- static typename TermT::FuncType GetFunc(RangedArg<T, TMin, TMax> iArg, TArgsT... remainingArgs)
- {
- if (iArg.val == TMax)
- {
- return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TMax>>::GetFunc(
- remainingArgs...);
- }
- if (TMax > TMin)
- {
- return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
- RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val}, remainingArgs...);
- }
- SWR_ASSUME(false);
- return nullptr;
- }
- template <typename T, T TVal, typename... TArgsT>
- static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg, TArgsT... remainingArgs)
- {
- SWR_ASSERT(iArg.val == TVal);
- return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TVal>>::GetFunc(
- remainingArgs...);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Helpers used to get / set environment variable
-//////////////////////////////////////////////////////////////////////////
-static INLINE std::string GetEnv(const std::string& variableName)
-{
- std::string output;
-#if defined(_WIN32)
- uint32_t valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0);
- if (!valueSize)
- return output;
- output.resize(valueSize - 1); // valueSize includes null, output.resize() does not
- GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize);
-#else
- char* env = getenv(variableName.c_str());
- output = env ? env : "";
-#endif
-
- return output;
-}
-
-static INLINE void SetEnv(const std::string& variableName, const std::string& value)
-{
-#if defined(_WIN32)
- SetEnvironmentVariableA(variableName.c_str(), value.c_str());
-#else
- setenv(variableName.c_str(), value.c_str(), true);
-#endif
-}
-
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
deleted file mode 100644
index 44482939c76..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ /dev/null
@@ -1,853 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file JitManager.cpp
- *
- * @brief Implementation if the Jit Manager.
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-
-#include "JitManager.h"
-#include "jit_api.h"
-#include "fetch_jit.h"
-
-#include "core/state.h"
-
-#include "gen_state_llvm.h"
-
-#include <sstream>
-#if defined(_WIN32)
-#include <psapi.h>
-#include <cstring>
-
-#define INTEL_OUTPUT_DIR "c:\\Intel"
-#define SWR_OUTPUT_DIR INTEL_OUTPUT_DIR "\\SWR"
-#define JITTER_OUTPUT_DIR SWR_OUTPUT_DIR "\\Jitter"
-#endif // _WIN32
-
-#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
-#include <pwd.h>
-#include <sys/stat.h>
-#endif
-
-
-using namespace llvm;
-using namespace SwrJit;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Contructor for JitManager.
-/// @param simdWidth - SIMD width to be used in generated program.
-JitManager::JitManager(uint32_t simdWidth, const char* arch, const char* core) :
- mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
- mArch(arch)
-{
- mpCurrentModule = nullptr;
- mpExec = nullptr;
-
- InitializeNativeTarget();
- InitializeNativeTargetAsmPrinter();
- InitializeNativeTargetDisassembler();
-
-
- // force JIT to use the same CPU arch as the rest of swr
- if (mArch.AVX512F())
- {
-#if USE_SIMD16_SHADERS
- if (mArch.AVX512ER())
- {
- mHostCpuName = StringRef("knl");
- }
- else
- {
- mHostCpuName = StringRef("skylake-avx512");
- }
- mUsingAVX512 = true;
-#else
- mHostCpuName = StringRef("core-avx2");
-#endif
- if (mVWidth == 0)
- {
- mVWidth = 8;
- }
- }
- else if (mArch.AVX2())
- {
- mHostCpuName = StringRef("core-avx2");
- if (mVWidth == 0)
- {
- mVWidth = 8;
- }
- }
- else if (mArch.AVX())
- {
- if (mArch.F16C())
- {
- mHostCpuName = StringRef("core-avx-i");
- }
- else
- {
- mHostCpuName = StringRef("corei7-avx");
- }
- if (mVWidth == 0)
- {
- mVWidth = 8;
- }
- }
- else
- {
- SWR_INVALID("Jitting requires at least AVX ISA support");
- }
-
-
- mOptLevel = CodeGenOpt::Aggressive;
-
- if (KNOB_JIT_OPTIMIZATION_LEVEL >= CodeGenOpt::None &&
- KNOB_JIT_OPTIMIZATION_LEVEL <= CodeGenOpt::Aggressive)
- {
- mOptLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL);
- }
-
- if (KNOB_JIT_ENABLE_CACHE)
- {
- mCache.Init(this, mHostCpuName, mOptLevel);
- }
-
- SetupNewModule();
- mIsModuleFinalized = true;
-
- // fetch function signature
-#if USE_SIMD16_SHADERS
- // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
-#else
- // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
-#endif
- std::vector<Type*> fsArgs;
-
- // llvm5 is picky and does not take a void * type
- fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
-
- fsArgs.push_back(Type::getInt8PtrTy(mContext));
-
- fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
-#if USE_SIMD16_SHADERS
- fsArgs.push_back(PointerType::get(Gen_simd16vertex(this), 0));
-#else
- fsArgs.push_back(PointerType::get(Gen_simdvertex(this), 0));
-#endif
-
- mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, false);
-
-#if defined(_MSC_VER)
- // explicitly instantiate used symbols from potentially staticly linked libs
- sys::DynamicLibrary::AddSymbol("exp2f", &exp2f);
- sys::DynamicLibrary::AddSymbol("log2f", &log2f);
- sys::DynamicLibrary::AddSymbol("sinf", &sinf);
- sys::DynamicLibrary::AddSymbol("cosf", &cosf);
- sys::DynamicLibrary::AddSymbol("powf", &powf);
-#endif
-
-#if defined(_WIN32)
- if (KNOB_DUMP_SHADER_IR)
- {
- CreateDirectoryPath(INTEL_OUTPUT_DIR);
- CreateDirectoryPath(SWR_OUTPUT_DIR);
- CreateDirectoryPath(JITTER_OUTPUT_DIR);
- }
-#endif
-}
-
-void JitManager::CreateExecEngine(std::unique_ptr<Module> pModule)
-{
- TargetOptions tOpts;
- tOpts.AllowFPOpFusion = FPOpFusion::Fast;
- tOpts.NoInfsFPMath = false;
- tOpts.NoNaNsFPMath = false;
- tOpts.UnsafeFPMath = false;
-
- // tOpts.PrintMachineCode = true;
-
- mpExec = EngineBuilder(std::move(pModule))
- .setTargetOptions(tOpts)
- .setOptLevel(mOptLevel)
- .setMCPU(mHostCpuName)
- .create();
-
- if (KNOB_JIT_ENABLE_CACHE)
- {
- mpExec->setObjectCache(&mCache);
- }
-
-#if LLVM_USE_INTEL_JITEVENTS
- JITEventListener* vTune = JITEventListener::createIntelJITEventListener();
- mpExec->RegisterJITEventListener(vTune);
-#endif
-
- mvExecEngines.push_back(mpExec);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Create new LLVM module.
-void JitManager::SetupNewModule()
-{
- SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
-
- std::unique_ptr<Module> newModule(new Module("", mContext));
- mpCurrentModule = newModule.get();
- mpCurrentModule->setTargetTriple(sys::getProcessTriple());
- CreateExecEngine(std::move(newModule));
- mIsModuleFinalized = false;
-}
-
-
-DIType*
-JitManager::CreateDebugStructType(StructType* pType,
- const std::string& name,
- DIFile* pFile,
- uint32_t lineNum,
- const std::vector<std::pair<std::string, uint32_t>>& members)
-{
- DIBuilder builder(*mpCurrentModule);
- SmallVector<Metadata*, 8> ElemTypes;
- DataLayout DL = DataLayout(mpCurrentModule);
- uint32_t size = DL.getTypeAllocSizeInBits(pType);
- uint32_t alignment = DL.getABITypeAlignment(pType);
- DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
-
- DICompositeType* pDIStructTy = builder.createStructType(pFile,
- name,
- pFile,
- lineNum,
- size,
- alignment,
- flags,
- nullptr,
- builder.getOrCreateArray(ElemTypes));
-
- // Register mapping now to break loops (in case struct contains itself or pointers to itself)
- mDebugStructMap[pType] = pDIStructTy;
-
- uint32_t idx = 0;
- for (auto& elem : pType->elements())
- {
- std::string name = members[idx].first;
- uint32_t lineNum = members[idx].second;
- size = DL.getTypeAllocSizeInBits(elem);
- alignment = DL.getABITypeAlignment(elem);
- uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
- llvm::DIType* pDebugTy = GetDebugType(elem);
- ElemTypes.push_back(builder.createMemberType(
- pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
-
- idx++;
- }
-
- pDIStructTy->replaceElements(builder.getOrCreateArray(ElemTypes));
- return pDIStructTy;
-}
-
-DIType* JitManager::GetDebugArrayType(Type* pTy)
-{
- DIBuilder builder(*mpCurrentModule);
- DataLayout DL = DataLayout(mpCurrentModule);
- ArrayType* pArrayTy = cast<ArrayType>(pTy);
- uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
- uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
-
- SmallVector<Metadata*, 8> Elems;
- Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements()));
- return builder.createArrayType(
- size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
-}
-
-// Create a DIType from llvm Type
-DIType* JitManager::GetDebugType(Type* pTy)
-{
- DIBuilder builder(*mpCurrentModule);
- Type::TypeID id = pTy->getTypeID();
-
- switch (id)
- {
- case Type::VoidTyID:
- return builder.createUnspecifiedType("void");
- break;
- case Type::HalfTyID:
- return builder.createBasicType("float16", 16, dwarf::DW_ATE_float);
- break;
- case Type::FloatTyID:
- return builder.createBasicType("float", 32, dwarf::DW_ATE_float);
- break;
- case Type::DoubleTyID:
- return builder.createBasicType("double", 64, dwarf::DW_ATE_float);
- break;
- case Type::IntegerTyID:
- return GetDebugIntegerType(pTy);
- break;
- case Type::StructTyID:
- return GetDebugStructType(pTy);
- break;
- case Type::ArrayTyID:
- return GetDebugArrayType(pTy);
- break;
- case Type::PointerTyID:
- return builder.createPointerType(GetDebugType(pTy->getPointerElementType()), 64, 64);
- break;
-#if LLVM_VERSION_MAJOR >= 11
- case Type::FixedVectorTyID:
-#else
- case Type::VectorTyID:
-#endif
- return GetDebugVectorType(pTy);
- break;
- case Type::FunctionTyID:
- return GetDebugFunctionType(pTy);
- break;
- default:
- SWR_ASSERT(false, "Unimplemented llvm type");
- }
- return nullptr;
-}
-
-// Create a DISubroutineType from an llvm FunctionType
-DIType* JitManager::GetDebugFunctionType(Type* pTy)
-{
- SmallVector<Metadata*, 8> ElemTypes;
- FunctionType* pFuncTy = cast<FunctionType>(pTy);
- DIBuilder builder(*mpCurrentModule);
-
- // Add result type
- ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType()));
-
- // Add arguments
- for (auto& param : pFuncTy->params())
- {
- ElemTypes.push_back(GetDebugType(param));
- }
-
- return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes));
-}
-
-DIType* JitManager::GetDebugIntegerType(Type* pTy)
-{
- DIBuilder builder(*mpCurrentModule);
- IntegerType* pIntTy = cast<IntegerType>(pTy);
- switch (pIntTy->getBitWidth())
- {
- case 1:
- return builder.createBasicType("int1", 1, dwarf::DW_ATE_unsigned);
- break;
- case 8:
- return builder.createBasicType("int8", 8, dwarf::DW_ATE_signed);
- break;
- case 16:
- return builder.createBasicType("int16", 16, dwarf::DW_ATE_signed);
- break;
- case 32:
- return builder.createBasicType("int", 32, dwarf::DW_ATE_signed);
- break;
- case 64:
- return builder.createBasicType("int64", 64, dwarf::DW_ATE_signed);
- break;
- case 128:
- return builder.createBasicType("int128", 128, dwarf::DW_ATE_signed);
- break;
- default:
- SWR_ASSERT(false, "Unimplemented integer bit width");
- }
- return nullptr;
-}
-
-DIType* JitManager::GetDebugVectorType(Type* pTy)
-{
- DIBuilder builder(*mpCurrentModule);
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVecTy = cast<FixedVectorType>(pTy);
-#elif LLVM_VERSION_MAJOR >= 11
- VectorType* pVecTy = cast<VectorType>(pTy);
-#else
- auto pVecTy = pTy;
-#endif
- DataLayout DL = DataLayout(mpCurrentModule);
- uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
- uint32_t alignment = DL.getABITypeAlignment(pVecTy);
- SmallVector<Metadata*, 1> Elems;
-
-#if LLVM_VERSION_MAJOR >= 11
- Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getNumElements()));
-#else
- Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements()));
-#endif
-
- return builder.createVectorType(size,
- alignment,
-#if LLVM_VERSION_MAJOR >= 11
- GetDebugType(pVecTy->getElementType()),
-#else
- GetDebugType(pVecTy->getVectorElementType()),
-#endif
- builder.getOrCreateArray(Elems));
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Dump function x86 assembly to file.
-/// @note This should only be called after the module has been jitted to x86 and the
-/// module will not be further accessed.
-void JitManager::DumpAsm(Function* pFunction, const char* fileName)
-{
- if (KNOB_DUMP_SHADER_IR)
- {
-#if defined(_WIN32)
- DWORD pid = GetCurrentProcessId();
- char procname[MAX_PATH];
- GetModuleFileNameA(NULL, procname, MAX_PATH);
- const char* pBaseName = strrchr(procname, '\\');
- std::stringstream outDir;
- outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
- CreateDirectoryPath(outDir.str().c_str());
-#endif
-
- std::error_code EC;
- Module* pModule = pFunction->getParent();
- const char* funcName = pFunction->getName().data();
- char fName[256];
-#if defined(_WIN32)
- sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
-#else
- sprintf(fName, "%s.%s.asm", funcName, fileName);
-#endif
-
- raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
-
- legacy::PassManager* pMPasses = new legacy::PassManager();
- auto* pTarget = mpExec->getTargetMachine();
- pTarget->Options.MCOptions.AsmVerbose = true;
-#if LLVM_VERSION_MAJOR >= 10
- pTarget->addPassesToEmitFile(
- *pMPasses, filestream, nullptr, CGFT_AssemblyFile);
-#elif LLVM_VERSION_MAJOR >= 7
- pTarget->addPassesToEmitFile(
- *pMPasses, filestream, nullptr, TargetMachine::CGFT_AssemblyFile);
-#else
- pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
-#endif
- pMPasses->run(*pModule);
- delete pMPasses;
- pTarget->Options.MCOptions.AsmVerbose = false;
- }
-}
-
-std::string JitManager::GetOutputDir()
-{
-#if defined(_WIN32)
- DWORD pid = GetCurrentProcessId();
- char procname[MAX_PATH];
- GetModuleFileNameA(NULL, procname, MAX_PATH);
- const char* pBaseName = strrchr(procname, '\\');
- std::stringstream outDir;
- outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid;
- CreateDirectoryPath(outDir.str().c_str());
- return outDir.str();
-#endif
- return "";
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Dump function to file.
-void JitManager::DumpToFile(Module* M,
- const char* fileName,
- llvm::AssemblyAnnotationWriter* annotater)
-{
- if (KNOB_DUMP_SHADER_IR)
- {
- std::string outDir = GetOutputDir();
-
- std::error_code EC;
- const char* funcName = M->getName().data();
- char fName[256];
-#if defined(_WIN32)
- sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
-#else
- sprintf(fName, "%s.%s.ll", funcName, fileName);
-#endif
- raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
- M->print(fd, annotater);
- fd.flush();
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Dump function to file.
-void JitManager::DumpToFile(Function* f, const char* fileName)
-{
- if (KNOB_DUMP_SHADER_IR)
- {
- std::string outDir = GetOutputDir();
-
- std::error_code EC;
- const char* funcName = f->getName().data();
- char fName[256];
-#if defined(_WIN32)
- sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
-#else
- sprintf(fName, "%s.%s.ll", funcName, fileName);
-#endif
- raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
- f->print(fd, nullptr);
-
-#if defined(_WIN32)
- sprintf(fName, "%s\\cfg.%s.%s.dot", outDir.c_str(), funcName, fileName);
-#else
- sprintf(fName, "cfg.%s.%s.dot", funcName, fileName);
-#endif
- fd.flush();
-
- raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
- WriteGraph(fd_cfg, (const Function*)f);
-
- fd_cfg.flush();
- }
-}
-
-extern "C" {
-bool g_DllActive = true;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Create JIT context.
-/// @param simdWidth - SIMD width to be used in generated program.
-HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core)
-{
- return new JitManager(targetSimdWidth, arch, core);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Destroy JIT context.
-void JITCALL JitDestroyContext(HANDLE hJitContext)
-{
- if (g_DllActive)
- {
- delete reinterpret_cast<JitManager*>(hJitContext);
- }
-}
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// JitCache
-//////////////////////////////////////////////////////////////////////////
-
-//////////////////////////////////////////////////////////////////////////
-/// JitCacheFileHeader
-//////////////////////////////////////////////////////////////////////////
-struct JitCacheFileHeader
-{
- void Init(uint32_t llCRC,
- uint32_t objCRC,
- const std::string& moduleID,
- const std::string& cpu,
- uint32_t optLevel,
- uint64_t objSize)
- {
- m_objSize = objSize;
- m_llCRC = llCRC;
- m_objCRC = objCRC;
- strncpy(m_ModuleID, moduleID.c_str(), JC_STR_MAX_LEN - 1);
- m_ModuleID[JC_STR_MAX_LEN - 1] = 0;
- strncpy(m_Cpu, cpu.c_str(), JC_STR_MAX_LEN - 1);
- m_Cpu[JC_STR_MAX_LEN - 1] = 0;
- m_optLevel = optLevel;
- }
-
-
- bool
- IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel)
- {
- if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) ||
- (m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel))
- {
- return false;
- }
-
- m_ModuleID[JC_STR_MAX_LEN - 1] = 0;
- if (strncmp(moduleID.c_str(), m_ModuleID, JC_STR_MAX_LEN - 1))
- {
- return false;
- }
-
- m_Cpu[JC_STR_MAX_LEN - 1] = 0;
- if (strncmp(cpu.c_str(), m_Cpu, JC_STR_MAX_LEN - 1))
- {
- return false;
- }
-
- return true;
- }
-
- uint64_t GetObjectSize() const { return m_objSize; }
- uint64_t GetObjectCRC() const { return m_objCRC; }
-
-private:
- static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543210ULL + 7;
- static const size_t JC_STR_MAX_LEN = 32;
- static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
- (LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
- ((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
-
- uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
- uint64_t m_objSize = 0;
- uint32_t m_llCRC = 0;
- uint32_t m_platformKey = JC_PLATFORM_KEY;
- uint32_t m_objCRC = 0;
- uint32_t m_optLevel = 0;
- char m_ModuleID[JC_STR_MAX_LEN] = {};
- char m_Cpu[JC_STR_MAX_LEN] = {};
-};
-
-static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
-{
- std::string bitcodeBuffer;
- raw_string_ostream bitcodeStream(bitcodeBuffer);
-
-#if LLVM_VERSION_MAJOR >= 7
- llvm::WriteBitcodeToFile(*M, bitcodeStream);
-#else
- llvm::WriteBitcodeToFile(M, bitcodeStream);
-#endif
- // M->print(bitcodeStream, nullptr, false);
-
- bitcodeStream.flush();
-
- return ComputeCRC(0, bitcodeBuffer.data(), bitcodeBuffer.size());
-}
-
-/// constructor
-JitCache::JitCache()
-{
-#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
- if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0)
- {
- char* homedir;
- if (!(homedir = getenv("HOME")))
- {
- homedir = getpwuid(getuid())->pw_dir;
- }
- mCacheDir = homedir;
- mCacheDir += (KNOB_JIT_CACHE_DIR.c_str() + 1);
- }
- else
-#endif
- {
- mCacheDir = KNOB_JIT_CACHE_DIR;
- }
-
- // Create cache dir at startup to allow jitter to write debug.ll files
- // to that directory.
- if (!llvm::sys::fs::exists(mCacheDir.str()) &&
- llvm::sys::fs::create_directories(mCacheDir.str()))
- {
- SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str());
- }
-
-}
-
-int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
-{
-
- return ExecCmd(CmdLine, nullptr, pStdOut, pStdErr);
-}
-
-/// Calculate actual directory where module will be cached.
-/// This is always a subdirectory of mCacheDir. Full absolute
-/// path name will be stored in mCurrentModuleCacheDir
-void JitCache::CalcModuleCacheDir()
-{
- mModuleCacheDir.clear();
-
- llvm::SmallString<MAX_PATH> moduleDir = mCacheDir;
-
- // Create 4 levels of directory hierarchy based on CRC, 256 entries each
- uint8_t* pCRC = (uint8_t*)&mCurrentModuleCRC;
- for (uint32_t i = 0; i < 4; ++i)
- {
- llvm::sys::path::append(moduleDir, std::to_string((int)pCRC[i]));
- }
-
- mModuleCacheDir = moduleDir;
-}
-
-/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
-void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
-{
- const std::string& moduleID = M->getModuleIdentifier();
- if (!moduleID.length())
- {
- return;
- }
-
- if (!mModuleCacheDir.size())
- {
- SWR_INVALID("Unset module cache directory");
- return;
- }
-
- if (!llvm::sys::fs::exists(mModuleCacheDir.str()) &&
- llvm::sys::fs::create_directories(mModuleCacheDir.str()))
- {
- SWR_INVALID("Unable to create directory: %s", mModuleCacheDir.c_str());
- return;
- }
-
- JitCacheFileHeader header;
-
- llvm::SmallString<MAX_PATH> filePath = mModuleCacheDir;
- llvm::sys::path::append(filePath, moduleID);
-
- llvm::SmallString<MAX_PATH> objPath = filePath;
- objPath += JIT_OBJ_EXT;
-
- {
- std::error_code err;
- llvm::raw_fd_ostream fileObj(objPath.c_str(), err, llvm::sys::fs::F_None);
- fileObj << Obj.getBuffer();
- fileObj.flush();
- }
-
-
- {
- std::error_code err;
- llvm::raw_fd_ostream fileObj(filePath.c_str(), err, llvm::sys::fs::F_None);
-
- uint32_t objcrc = ComputeCRC(0, Obj.getBufferStart(), Obj.getBufferSize());
-
- header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize());
-
- fileObj.write((const char*)&header, sizeof(header));
- fileObj.flush();
- }
-}
-
-/// Returns a pointer to a newly allocated MemoryBuffer that contains the
-/// object which corresponds with Module M, or 0 if an object is not
-/// available.
-std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
-{
- const std::string& moduleID = M->getModuleIdentifier();
- mCurrentModuleCRC = ComputeModuleCRC(M);
-
- if (!moduleID.length())
- {
- return nullptr;
- }
-
- CalcModuleCacheDir();
-
- if (!llvm::sys::fs::exists(mModuleCacheDir))
- {
- return nullptr;
- }
-
- llvm::SmallString<MAX_PATH> filePath = mModuleCacheDir;
- llvm::sys::path::append(filePath, moduleID);
-
- llvm::SmallString<MAX_PATH> objFilePath = filePath;
- objFilePath += JIT_OBJ_EXT;
-
- FILE* fpObjIn = nullptr;
- FILE* fpIn = fopen(filePath.c_str(), "rb");
- if (!fpIn)
- {
- return nullptr;
- }
-
- std::unique_ptr<llvm::MemoryBuffer> pBuf = nullptr;
- do
- {
- JitCacheFileHeader header;
- if (!fread(&header, sizeof(header), 1, fpIn))
- {
- break;
- }
-
- if (!header.IsValid(mCurrentModuleCRC, moduleID, mCpu, mOptLevel))
- {
- break;
- }
-
- fpObjIn = fopen(objFilePath.c_str(), "rb");
- if (!fpObjIn)
- {
- break;
- }
-
-#if LLVM_VERSION_MAJOR < 6
- pBuf = llvm::MemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
-#else
- pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
-#endif
- if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
- {
- pBuf = nullptr;
- break;
- }
-
- if (header.GetObjectCRC() != ComputeCRC(0, pBuf->getBufferStart(), pBuf->getBufferSize()))
- {
- SWR_TRACE("Invalid object cache file, ignoring: %s", filePath.c_str());
- pBuf = nullptr;
- break;
- }
-
- } while (0);
-
- fclose(fpIn);
-
- if (fpObjIn)
- {
- fclose(fpObjIn);
- }
-
-
- return pBuf;
-}
-
-void InterleaveAssemblyAnnotater::emitInstructionAnnot(const llvm::Instruction* pInst,
- llvm::formatted_raw_ostream& OS)
-{
- auto dbgLoc = pInst->getDebugLoc();
- if (dbgLoc)
- {
- unsigned int line = dbgLoc.getLine();
- if (line != mCurrentLineNo)
- {
- if (line > 0 && line <= mAssembly.size())
- {
- // HACK: here we assume that OS is a formatted_raw_ostream(ods())
- // and modify the color accordingly. We can't do the color
- // modification on OS because formatted_raw_ostream strips
- // the color information. The only way to fix this behavior
- // is to patch LLVM.
- OS << "\n; " << line << ": " << mAssembly[line - 1] << "\n";
- }
- mCurrentLineNo = line;
- }
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
deleted file mode 100644
index d96d22e1b95..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file JitManager.h
- *
- * @brief JitManager contains the LLVM data structures used for JIT generation
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "jit_pch.hpp"
-#include "common/isa.hpp"
-#include <llvm/IR/AssemblyAnnotationWriter.h>
-
-
-//////////////////////////////////////////////////////////////////////////
-/// JitInstructionSet
-/// @brief Subclass of InstructionSet that allows users to override
-/// the reporting of support for certain ISA features. This allows capping
-/// the jitted code to a certain feature level, e.g. jit AVX level code on
-/// a platform that supports AVX2.
-//////////////////////////////////////////////////////////////////////////
-class JitInstructionSet : public InstructionSet
-{
-public:
- JitInstructionSet(const char* requestedIsa) : isaRequest(requestedIsa)
- {
- std::transform(isaRequest.begin(), isaRequest.end(), isaRequest.begin(), ::tolower);
-
- if (isaRequest == "avx")
- {
- bForceAVX = true;
- bForceAVX2 = false;
- bForceAVX512 = false;
- }
- else if (isaRequest == "avx2")
- {
- bForceAVX = false;
- bForceAVX2 = true;
- bForceAVX512 = false;
- }
- else if (isaRequest == "avx512")
- {
- bForceAVX = false;
- bForceAVX2 = false;
- bForceAVX512 = true;
- }
- };
-
- bool AVX2(void) { return bForceAVX ? 0 : InstructionSet::AVX2(); }
- bool AVX512F(void) { return (bForceAVX | bForceAVX2) ? 0 : InstructionSet::AVX512F(); }
- bool AVX512ER(void) { return (bForceAVX | bForceAVX2) ? 0 : InstructionSet::AVX512ER(); }
- bool BMI2(void) { return bForceAVX ? 0 : InstructionSet::BMI2(); }
-
-private:
- bool bForceAVX = false;
- bool bForceAVX2 = false;
- bool bForceAVX512 = false;
- std::string isaRequest;
-};
-
-struct JitLLVMContext : llvm::LLVMContext
-{
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// JitCache
-//////////////////////////////////////////////////////////////////////////
-struct JitManager; // Forward Decl
-class JitCache : public llvm::ObjectCache
-{
-public:
- /// constructor
- JitCache();
- virtual ~JitCache() {}
-
- void Init(JitManager* pJitMgr, const llvm::StringRef& cpu, llvm::CodeGenOpt::Level level)
- {
- mCpu = cpu.str();
- mpJitMgr = pJitMgr;
- mOptLevel = level;
- }
-
- /// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
- void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj) override;
-
- /// Returns a pointer to a newly allocated MemoryBuffer that contains the
- /// object which corresponds with Module M, or 0 if an object is not
- /// available.
- std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M) override;
-
- const char* GetModuleCacheDir() { return mModuleCacheDir.c_str(); }
-
-private:
- std::string mCpu;
- llvm::SmallString<MAX_PATH> mCacheDir;
- llvm::SmallString<MAX_PATH> mModuleCacheDir;
- uint32_t mCurrentModuleCRC = 0;
- JitManager* mpJitMgr = nullptr;
- llvm::CodeGenOpt::Level mOptLevel = llvm::CodeGenOpt::None;
-
- /// Calculate actual directory where module will be cached.
- /// This is always a subdirectory of mCacheDir. Full absolute
- /// path name will be stored in mCurrentModuleCacheDir
- void CalcModuleCacheDir();
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// JitManager
-//////////////////////////////////////////////////////////////////////////
-struct JitManager
-{
- JitManager(uint32_t w, const char* arch, const char* core);
- ~JitManager()
- {
- for (auto* pExec : mvExecEngines)
- {
- delete pExec;
- }
- }
-
- JitLLVMContext mContext; ///< LLVM compiler
- llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder
- llvm::ExecutionEngine* mpExec;
- std::vector<llvm::ExecutionEngine*> mvExecEngines;
- JitCache mCache;
- llvm::StringRef mHostCpuName;
- llvm::CodeGenOpt::Level mOptLevel;
-
- // Need to be rebuilt after a JIT and before building new IR
- llvm::Module* mpCurrentModule;
- bool mIsModuleFinalized;
- uint32_t mJitNumber;
-
- uint32_t mVWidth;
-
- bool mUsingAVX512 = false;
-
- // fetch shader types
- llvm::FunctionType* mFetchShaderTy;
-
- JitInstructionSet mArch;
-
- // Debugging support
- std::unordered_map<llvm::StructType*, llvm::DIType*> mDebugStructMap;
-
- void CreateExecEngine(std::unique_ptr<llvm::Module> M);
- void SetupNewModule();
-
- void DumpAsm(llvm::Function* pFunction, const char* fileName);
- static void DumpToFile(llvm::Function* f, const char* fileName);
- static void DumpToFile(llvm::Module* M,
- const char* fileName,
- llvm::AssemblyAnnotationWriter* annotater = nullptr);
- static std::string GetOutputDir();
-
- // Debugging support methods
- llvm::DIType* GetDebugType(llvm::Type* pTy);
- llvm::DIType* GetDebugIntegerType(llvm::Type* pTy);
- llvm::DIType* GetDebugArrayType(llvm::Type* pTy);
- llvm::DIType* GetDebugVectorType(llvm::Type* pTy);
- llvm::DIType* GetDebugFunctionType(llvm::Type* pTy);
-
- llvm::DIType* GetDebugStructType(llvm::Type* pType)
- {
- llvm::StructType* pStructTy = llvm::cast<llvm::StructType>(pType);
- if (mDebugStructMap.find(pStructTy) == mDebugStructMap.end())
- {
- return nullptr;
- }
- return mDebugStructMap[pStructTy];
- }
-
- llvm::DIType*
- CreateDebugStructType(llvm::StructType* pType,
- const std::string& name,
- llvm::DIFile* pFile,
- uint32_t lineNum,
- const std::vector<std::pair<std::string, uint32_t>>& members);
-};
-
-class InterleaveAssemblyAnnotater : public llvm::AssemblyAnnotationWriter
-{
-public:
- void emitInstructionAnnot(const llvm::Instruction* pInst,
- llvm::formatted_raw_ostream& OS) override;
- std::vector<std::string> mAssembly;
-
-private:
- uint32_t mCurrentLineNo = 0;
-};
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
deleted file mode 100644
index 80959809806..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ /dev/null
@@ -1,924 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file blend_jit.cpp
- *
- * @brief Implementation of the blend jitter
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder.h"
-#include "jit_api.h"
-#include "blend_jit.h"
-#include "gen_state_llvm.h"
-#include "functionpasses/passes.h"
-
-#include "util/compiler.h"
-
-// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
-#define QUANTIZE_THRESHOLD 2
-
-using namespace llvm;
-using namespace SwrJit;
-
-//////////////////////////////////////////////////////////////////////////
-/// Interface to Jitting a blend shader
-//////////////////////////////////////////////////////////////////////////
-struct BlendJit : public Builder
-{
- BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
-
- template <bool Color, bool Alpha>
- void GenerateBlendFactor(SWR_BLEND_FACTOR factor,
- Value* constColor[4],
- Value* src[4],
- Value* src1[4],
- Value* dst[4],
- Value* result[4])
- {
- Value* out[4];
-
- switch (factor)
- {
- case BLENDFACTOR_ONE:
- out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f);
- break;
- case BLENDFACTOR_SRC_COLOR:
- out[0] = src[0];
- out[1] = src[1];
- out[2] = src[2];
- out[3] = src[3];
- break;
- case BLENDFACTOR_SRC_ALPHA:
- out[0] = out[1] = out[2] = out[3] = src[3];
- break;
- case BLENDFACTOR_DST_ALPHA:
- out[0] = out[1] = out[2] = out[3] = dst[3];
- break;
- case BLENDFACTOR_DST_COLOR:
- out[0] = dst[0];
- out[1] = dst[1];
- out[2] = dst[2];
- out[3] = dst[3];
- break;
- case BLENDFACTOR_SRC_ALPHA_SATURATE:
- out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
- out[3] = VIMMED1(1.0f);
- break;
- case BLENDFACTOR_CONST_COLOR:
- out[0] = constColor[0];
- out[1] = constColor[1];
- out[2] = constColor[2];
- out[3] = constColor[3];
- break;
- case BLENDFACTOR_CONST_ALPHA:
- out[0] = out[1] = out[2] = out[3] = constColor[3];
- break;
- case BLENDFACTOR_SRC1_COLOR:
- out[0] = src1[0];
- out[1] = src1[1];
- out[2] = src1[2];
- out[3] = src1[3];
- break;
- case BLENDFACTOR_SRC1_ALPHA:
- out[0] = out[1] = out[2] = out[3] = src1[3];
- break;
- case BLENDFACTOR_ZERO:
- out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
- break;
- case BLENDFACTOR_INV_SRC_COLOR:
- out[0] = FSUB(VIMMED1(1.0f), src[0]);
- out[1] = FSUB(VIMMED1(1.0f), src[1]);
- out[2] = FSUB(VIMMED1(1.0f), src[2]);
- out[3] = FSUB(VIMMED1(1.0f), src[3]);
- break;
- case BLENDFACTOR_INV_SRC_ALPHA:
- out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]);
- break;
- case BLENDFACTOR_INV_DST_ALPHA:
- out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]);
- break;
- case BLENDFACTOR_INV_DST_COLOR:
- out[0] = FSUB(VIMMED1(1.0f), dst[0]);
- out[1] = FSUB(VIMMED1(1.0f), dst[1]);
- out[2] = FSUB(VIMMED1(1.0f), dst[2]);
- out[3] = FSUB(VIMMED1(1.0f), dst[3]);
- break;
- case BLENDFACTOR_INV_CONST_COLOR:
- out[0] = FSUB(VIMMED1(1.0f), constColor[0]);
- out[1] = FSUB(VIMMED1(1.0f), constColor[1]);
- out[2] = FSUB(VIMMED1(1.0f), constColor[2]);
- out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
- break;
- case BLENDFACTOR_INV_CONST_ALPHA:
- out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
- break;
- case BLENDFACTOR_INV_SRC1_COLOR:
- out[0] = FSUB(VIMMED1(1.0f), src1[0]);
- out[1] = FSUB(VIMMED1(1.0f), src1[1]);
- out[2] = FSUB(VIMMED1(1.0f), src1[2]);
- out[3] = FSUB(VIMMED1(1.0f), src1[3]);
- break;
- case BLENDFACTOR_INV_SRC1_ALPHA:
- out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]);
- break;
- default:
- SWR_INVALID("Unsupported blend factor: %d", factor);
- out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
- break;
- }
-
- if (Color)
- {
- result[0] = out[0];
- result[1] = out[1];
- result[2] = out[2];
- }
-
- if (Alpha)
- {
- result[3] = out[3];
- }
- }
-
- void Clamp(SWR_FORMAT format, Value* src[4])
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- SWR_TYPE type = info.type[0];
-
- switch (type)
- {
- default:
- break;
-
- case SWR_TYPE_UNORM:
- src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f));
- src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f));
- src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f));
- src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f));
- break;
-
- case SWR_TYPE_SNORM:
- src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f));
- src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f));
- src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f));
- src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
- break;
-
- case SWR_TYPE_UNKNOWN:
- SWR_INVALID("Unsupported format type: %d", type);
- }
- }
-
- void ApplyDefaults(SWR_FORMAT format, Value* src[4])
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-
- bool valid[] = {false, false, false, false};
- for (uint32_t c = 0; c < info.numComps; ++c)
- {
- valid[info.swizzle[c]] = true;
- }
-
- for (uint32_t c = 0; c < 4; ++c)
- {
- if (!valid[c])
- {
- src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty);
- }
- }
- }
-
- void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4])
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-
- for (uint32_t c = 0; c < info.numComps; ++c)
- {
- if (info.type[c] == SWR_TYPE_UNUSED)
- {
- src[info.swizzle[c]] =
- BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
- }
- }
- }
-
- void Quantize(SWR_FORMAT format, Value* src[4])
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- for (uint32_t c = 0; c < info.numComps; ++c)
- {
- if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED)
- {
- uint32_t swizComp = info.swizzle[c];
- float factor = (float)((1 << info.bpc[c]) - 1);
- switch (info.type[c])
- {
- case SWR_TYPE_UNORM:
- src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
- src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
- src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f / factor));
- break;
- default:
- SWR_INVALID("Unsupported format type: %d", info.type[c]);
- }
- }
- }
- }
-
- template <bool Color, bool Alpha>
- void BlendFunc(SWR_BLEND_OP blendOp,
- Value* src[4],
- Value* srcFactor[4],
- Value* dst[4],
- Value* dstFactor[4],
- Value* result[4])
- {
- Value* out[4];
- Value* srcBlend[4];
- Value* dstBlend[4];
- for (uint32_t i = 0; i < 4; ++i)
- {
- srcBlend[i] = FMUL(src[i], srcFactor[i]);
- dstBlend[i] = FMUL(dst[i], dstFactor[i]);
- }
-
- switch (blendOp)
- {
- case BLENDOP_ADD:
- out[0] = FADD(srcBlend[0], dstBlend[0]);
- out[1] = FADD(srcBlend[1], dstBlend[1]);
- out[2] = FADD(srcBlend[2], dstBlend[2]);
- out[3] = FADD(srcBlend[3], dstBlend[3]);
- break;
-
- case BLENDOP_SUBTRACT:
- out[0] = FSUB(srcBlend[0], dstBlend[0]);
- out[1] = FSUB(srcBlend[1], dstBlend[1]);
- out[2] = FSUB(srcBlend[2], dstBlend[2]);
- out[3] = FSUB(srcBlend[3], dstBlend[3]);
- break;
-
- case BLENDOP_REVSUBTRACT:
- out[0] = FSUB(dstBlend[0], srcBlend[0]);
- out[1] = FSUB(dstBlend[1], srcBlend[1]);
- out[2] = FSUB(dstBlend[2], srcBlend[2]);
- out[3] = FSUB(dstBlend[3], srcBlend[3]);
- break;
-
- case BLENDOP_MIN:
- out[0] = VMINPS(src[0], dst[0]);
- out[1] = VMINPS(src[1], dst[1]);
- out[2] = VMINPS(src[2], dst[2]);
- out[3] = VMINPS(src[3], dst[3]);
- break;
-
- case BLENDOP_MAX:
- out[0] = VMAXPS(src[0], dst[0]);
- out[1] = VMAXPS(src[1], dst[1]);
- out[2] = VMAXPS(src[2], dst[2]);
- out[3] = VMAXPS(src[3], dst[3]);
- break;
-
- default:
- SWR_INVALID("Unsupported blend operation: %d", blendOp);
- out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
- break;
- }
-
- if (Color)
- {
- result[0] = out[0];
- result[1] = out[1];
- result[2] = out[2];
- }
-
- if (Alpha)
- {
- result[3] = out[3];
- }
- }
-
- void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
- {
- // Op: (s == PS output, d = RT contents)
- switch (logicOp)
- {
- case LOGICOP_CLEAR:
- result[0] = VIMMED1(0);
- result[1] = VIMMED1(0);
- result[2] = VIMMED1(0);
- result[3] = VIMMED1(0);
- break;
-
- case LOGICOP_NOR:
- // ~(s | d)
- result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
- result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
- result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
- result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
- break;
-
- case LOGICOP_AND_INVERTED:
- // ~s & d
- // todo: use avx andnot instr when I can find the intrinsic to call
- result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
- result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
- result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
- result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
- break;
-
- case LOGICOP_COPY_INVERTED:
- // ~s
- result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF));
- result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF));
- result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF));
- result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF));
- break;
-
- case LOGICOP_AND_REVERSE:
- // s & ~d
- // todo: use avx andnot instr when I can find the intrinsic to call
- result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
- result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
- result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
- result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
- break;
-
- case LOGICOP_INVERT:
- // ~d
- result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF));
- result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF));
- result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF));
- result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF));
- break;
-
- case LOGICOP_XOR:
- // s ^ d
- result[0] = XOR(src[0], dst[0]);
- result[1] = XOR(src[1], dst[1]);
- result[2] = XOR(src[2], dst[2]);
- result[3] = XOR(src[3], dst[3]);
- break;
-
- case LOGICOP_NAND:
- // ~(s & d)
- result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
- result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
- result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
- result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
- break;
-
- case LOGICOP_AND:
- // s & d
- result[0] = AND(src[0], dst[0]);
- result[1] = AND(src[1], dst[1]);
- result[2] = AND(src[2], dst[2]);
- result[3] = AND(src[3], dst[3]);
- break;
-
- case LOGICOP_EQUIV:
- // ~(s ^ d)
- result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
- result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
- result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
- result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
- break;
-
- case LOGICOP_NOOP:
- result[0] = dst[0];
- result[1] = dst[1];
- result[2] = dst[2];
- result[3] = dst[3];
- break;
-
- case LOGICOP_OR_INVERTED:
- // ~s | d
- result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
- result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
- result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
- result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
- break;
-
- case LOGICOP_COPY:
- result[0] = src[0];
- result[1] = src[1];
- result[2] = src[2];
- result[3] = src[3];
- break;
-
- case LOGICOP_OR_REVERSE:
- // s | ~d
- result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
- result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
- result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
- result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
- break;
-
- case LOGICOP_OR:
- // s | d
- result[0] = OR(src[0], dst[0]);
- result[1] = OR(src[1], dst[1]);
- result[2] = OR(src[2], dst[2]);
- result[3] = OR(src[3], dst[3]);
- break;
-
- case LOGICOP_SET:
- result[0] = VIMMED1(0xFFFFFFFF);
- result[1] = VIMMED1(0xFFFFFFFF);
- result[2] = VIMMED1(0xFFFFFFFF);
- result[3] = VIMMED1(0xFFFFFFFF);
- break;
-
- default:
- SWR_INVALID("Unsupported logic operation: %d", logicOp);
- result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f);
- break;
- }
- }
-
- void
- AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
- {
- // load uint32_t reference
- Value* pRef = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_alphaTestReference}));
-
- // load alpha
- Value* pAlpha = LOAD(ppAlpha, {0, 0});
-
- Value* pTest = nullptr;
- if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
- {
- // convert float alpha to unorm8
- Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
- pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
-
- // compare
- switch (state.alphaTestFunction)
- {
- case ZFUNC_ALWAYS:
- pTest = VIMMED1(true);
- break;
- case ZFUNC_NEVER:
- pTest = VIMMED1(false);
- break;
- case ZFUNC_LT:
- pTest = ICMP_ULT(pAlphaU8, pRef);
- break;
- case ZFUNC_EQ:
- pTest = ICMP_EQ(pAlphaU8, pRef);
- break;
- case ZFUNC_LE:
- pTest = ICMP_ULE(pAlphaU8, pRef);
- break;
- case ZFUNC_GT:
- pTest = ICMP_UGT(pAlphaU8, pRef);
- break;
- case ZFUNC_NE:
- pTest = ICMP_NE(pAlphaU8, pRef);
- break;
- case ZFUNC_GE:
- pTest = ICMP_UGE(pAlphaU8, pRef);
- break;
- default:
- SWR_INVALID("Invalid alpha test function");
- break;
- }
- }
- else
- {
- // cast ref to float
- pRef = BITCAST(pRef, mSimdFP32Ty);
-
- // compare
- switch (state.alphaTestFunction)
- {
- case ZFUNC_ALWAYS:
- pTest = VIMMED1(true);
- break;
- case ZFUNC_NEVER:
- pTest = VIMMED1(false);
- break;
- case ZFUNC_LT:
- pTest = FCMP_OLT(pAlpha, pRef);
- break;
- case ZFUNC_EQ:
- pTest = FCMP_OEQ(pAlpha, pRef);
- break;
- case ZFUNC_LE:
- pTest = FCMP_OLE(pAlpha, pRef);
- break;
- case ZFUNC_GT:
- pTest = FCMP_OGT(pAlpha, pRef);
- break;
- case ZFUNC_NE:
- pTest = FCMP_ONE(pAlpha, pRef);
- break;
- case ZFUNC_GE:
- pTest = FCMP_OGE(pAlpha, pRef);
- break;
- default:
- SWR_INVALID("Invalid alpha test function");
- break;
- }
- }
-
- // load current mask
- Value* pMask = LOAD(ppMask);
-
- // convert to int1 mask
- pMask = MASK(pMask);
-
- // and with alpha test result
- pMask = AND(pMask, pTest);
-
- // convert back to vector mask
- pMask = VMASK(pMask);
-
- // store new mask
- STORE(pMask, ppMask);
- }
-
- Function* Create(const BLEND_COMPILE_STATE& state)
- {
- std::stringstream fnName("BLND_",
- std::ios_base::in | std::ios_base::out | std::ios_base::ate);
- fnName << ComputeCRC(0, &state, sizeof(state));
-
- // blend function signature
- // typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
-
- std::vector<Type*> args{
- PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0) // SWR_BLEND_CONTEXT*
- };
-
- // std::vector<Type*> args{
- // PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0), // SWR_BLEND_CONTEXT*
- //};
-
- FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
- Function* blendFunc = Function::Create(
- fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
- blendFunc->getParent()->setModuleIdentifier(blendFunc->getName());
-
- BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
-
- IRB()->SetInsertPoint(entry);
-
- // arguments
- auto argitr = blendFunc->arg_begin();
- Value* pBlendContext = &*argitr++;
- pBlendContext->setName("pBlendContext");
- Value* pBlendState = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pBlendState});
- pBlendState->setName("pBlendState");
- Value* pSrc = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src});
- pSrc->setName("src");
- Value* pSrc1 = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src1});
- pSrc1->setName("src1");
- Value* pSrc0Alpha = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src0alpha});
- pSrc0Alpha->setName("src0alpha");
- Value* sampleNum = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_sampleNum});
- sampleNum->setName("sampleNum");
- Value* pDst = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pDst});
- pDst->setName("pDst");
- Value* pResult = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_result});
- pResult->setName("result");
- Value* ppoMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_oMask});
- ppoMask->setName("ppoMask");
- Value* ppMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pMask});
- ppMask->setName("pMask");
-
- static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
- "Unsupported hot tile format");
- Value* dst[4];
- Value* constantColor[4];
- Value* src[4];
- Value* src1[4];
- Value* result[4];
- for (uint32_t i = 0; i < 4; ++i)
- {
- // load hot tile
- dst[i] = LOAD(pDst, {0, i});
-
- // load constant color
- constantColor[i] = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_constantColor, i}));
-
- // load src
- src[i] = LOAD(pSrc, {0, i});
-
- // load src1
- src1[i] = LOAD(pSrc1, {0, i});
- }
- Value* currentSampleMask = VIMMED1(-1);
- if (state.desc.alphaToCoverageEnable)
- {
- Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
- uint32_t bits = (1 << state.desc.numSamples) - 1;
- currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
- currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty);
- }
-
- // alpha test
- if (state.desc.alphaTestEnable)
- {
- // Gather for archrast stats
- STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
- AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
- }
- else
- {
- // Gather for archrast stats
- STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
- }
-
- // color blend
- if (state.blendState.blendEnable)
- {
- // Gather for archrast stats
- STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
-
- // clamp sources
- Clamp(state.format, src);
- Clamp(state.format, src1);
- Clamp(state.format, dst);
- Clamp(state.format, constantColor);
-
- // apply defaults to hottile contents to take into account missing components
- ApplyDefaults(state.format, dst);
-
- // Force defaults for unused 'X' components
- ApplyUnusedDefaults(state.format, dst);
-
- // Quantize low precision components
- Quantize(state.format, dst);
-
- // special case clamping for R11G11B10_float which has no sign bit
- if (state.format == R11G11B10_FLOAT)
- {
- dst[0] = VMAXPS(dst[0], VIMMED1(0.0f));
- dst[1] = VMAXPS(dst[1], VIMMED1(0.0f));
- dst[2] = VMAXPS(dst[2], VIMMED1(0.0f));
- dst[3] = VMAXPS(dst[3], VIMMED1(0.0f));
- }
-
- Value* srcFactor[4];
- Value* dstFactor[4];
- if (state.desc.independentAlphaBlendEnable)
- {
- GenerateBlendFactor<true, false>(
- state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor,
- constantColor,
- src,
- src1,
- dst,
- srcFactor);
-
- GenerateBlendFactor<true, false>(
- state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
- GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor,
- constantColor,
- src,
- src1,
- dst,
- dstFactor);
-
- BlendFunc<true, false>(
- state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
- BlendFunc<false, true>(
- state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
- }
- else
- {
- GenerateBlendFactor<true, true>(
- state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<true, true>(
- state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
-
- BlendFunc<true, true>(
- state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
- }
-
- // store results out
- for (uint32_t i = 0; i < 4; ++i)
- {
- STORE(result[i], pResult, {0, i});
- }
- }
- else
- {
- // Gather for archrast stats
- STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
- }
-
- if (state.blendState.logicOpEnable)
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
- Value* vMask[4];
- float scale[4];
-
- if (!state.blendState.blendEnable)
- {
- Clamp(state.format, src);
- Clamp(state.format, dst);
- }
-
- for (uint32_t i = 0; i < 4; i++)
- {
- if (info.type[i] == SWR_TYPE_UNUSED)
- {
- continue;
- }
-
- if (info.bpc[i] >= 32)
- {
- vMask[i] = VIMMED1(0xFFFFFFFF);
- scale[i] = 0xFFFFFFFF;
- }
- else
- {
- vMask[i] = VIMMED1((1 << info.bpc[i]) - 1);
- if (info.type[i] == SWR_TYPE_SNORM)
- scale[i] = (1 << (info.bpc[i] - 1)) - 1;
- else
- scale[i] = (1 << info.bpc[i]) - 1;
- }
-
- switch (info.type[i])
- {
- default:
- SWR_INVALID("Unsupported type for logic op: %d", info.type[i]);
- break;
-
- case SWR_TYPE_UNKNOWN:
- case SWR_TYPE_UNUSED:
- FALLTHROUGH;
-
- case SWR_TYPE_UINT:
- case SWR_TYPE_SINT:
- src[i] = BITCAST(src[i], mSimdInt32Ty);
- dst[i] = BITCAST(dst[i], mSimdInt32Ty);
- break;
- case SWR_TYPE_SNORM:
- src[i] = FP_TO_SI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
- dst[i] = FP_TO_SI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
- break;
- case SWR_TYPE_UNORM:
- src[i] = FP_TO_UI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
- dst[i] = FP_TO_UI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
- break;
- }
- }
-
- LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
-
- // store results out
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (info.type[i] == SWR_TYPE_UNUSED)
- {
- continue;
- }
-
- // clear upper bits from PS output not in RT format after doing logic op
- result[i] = AND(result[i], vMask[i]);
-
- switch (info.type[i])
- {
- default:
- SWR_INVALID("Unsupported type for logic op: %d", info.type[i]);
- break;
-
- case SWR_TYPE_UNKNOWN:
- case SWR_TYPE_UNUSED:
- FALLTHROUGH;
-
- case SWR_TYPE_UINT:
- case SWR_TYPE_SINT:
- result[i] = BITCAST(result[i], mSimdFP32Ty);
- break;
- case SWR_TYPE_SNORM:
- result[i] = SHL(result[i], C(32 - info.bpc[i]));
- result[i] = ASHR(result[i], C(32 - info.bpc[i]));
- result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
- break;
- case SWR_TYPE_UNORM:
- result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
- break;
- }
-
- STORE(result[i], pResult, {0, i});
- }
- }
-
- if (state.desc.oMaskEnable)
- {
- assert(!(state.desc.alphaToCoverageEnable));
- // load current mask
- Value* oMask = LOAD(ppoMask);
- currentSampleMask = AND(oMask, currentSampleMask);
- }
-
- if (state.desc.sampleMaskEnable)
- {
- Value* sampleMask = LOAD(pBlendState, {0, SWR_BLEND_STATE_sampleMask});
- currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask);
- }
-
- if (state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
- state.desc.oMaskEnable)
- {
- // load coverage mask and mask off any lanes with no samples
- Value* pMask = LOAD(ppMask);
- Value* sampleMasked = SHL(C(1), sampleNum);
- currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked));
- currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty);
- Value* outputMask = AND(pMask, currentSampleMask);
- // store new mask
- STORE(outputMask, GEP(ppMask, C(0)));
- }
-
- RET_VOID();
-
- JitManager::DumpToFile(blendFunc, "");
-
- ::FunctionPassManager passes(JM()->mpCurrentModule);
-
- passes.add(createBreakCriticalEdgesPass());
- passes.add(createCFGSimplificationPass());
- passes.add(createEarlyCSEPass());
- passes.add(createPromoteMemoryToRegisterPass());
- passes.add(createCFGSimplificationPass());
- passes.add(createEarlyCSEPass());
- passes.add(createInstructionCombiningPass());
-#if LLVM_VERSION_MAJOR <= 11
- passes.add(createConstantPropagationPass());
-#endif
- passes.add(createSCCPPass());
- passes.add(createAggressiveDCEPass());
-
- passes.add(createLowerX86Pass(this));
-
- passes.run(*blendFunc);
-
- JitManager::DumpToFile(blendFunc, "optimized");
-
- return blendFunc;
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JITs from fetch shader IR
-/// @param hJitMgr - JitManager handle
-/// @param func - LLVM function IR
-/// @return PFN_FETCH_FUNC - pointer to fetch code
-PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
-{
- const llvm::Function* func = (const llvm::Function*)hFunc;
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
- PFN_BLEND_JIT_FUNC pfnBlend;
- pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
- // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
- // add new IR to the module
- pJitMgr->mIsModuleFinalized = true;
-
- return pfnBlend;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compiles blend shader
-/// @param hJitMgr - JitManager handle
-/// @param state - blend state to build function from
-extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr,
- const BLEND_COMPILE_STATE& state)
-{
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
-
- pJitMgr->SetupNewModule();
-
- BlendJit theJit(pJitMgr);
- HANDLE hFunc = theJit.Create(state);
-
- return JitBlendFunc(hJitMgr, hFunc);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h
deleted file mode 100644
index 3e78054eced..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file blend_jit.h
- *
- * @brief Definition of the blend jitter
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/formats.h"
-#include "core/state.h"
-
-struct RENDER_TARGET_BLEND_COMPILE_STATE
-{
- bool blendEnable;
- bool logicOpEnable;
- SWR_BLEND_FACTOR sourceAlphaBlendFactor;
- SWR_BLEND_FACTOR destAlphaBlendFactor;
- SWR_BLEND_FACTOR sourceBlendFactor;
- SWR_BLEND_FACTOR destBlendFactor;
- SWR_BLEND_OP colorBlendFunc;
- SWR_BLEND_OP alphaBlendFunc;
- SWR_LOGIC_OP logicOpFunc;
-};
-
-enum ALPHA_TEST_FORMAT
-{
- ALPHA_TEST_UNORM8,
- ALPHA_TEST_FLOAT32
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// BLEND_DESC
-//////////////////////////////////////////////////////////////////////////
-struct BLEND_DESC
-{
- union
- {
- struct
- {
- uint32_t alphaTestEnable : 1;
- uint32_t independentAlphaBlendEnable : 1;
- uint32_t alphaToCoverageEnable : 1;
- uint32_t oMaskEnable : 1;
- uint32_t inputCoverageEnable : 1;
- uint32_t sampleMaskEnable : 1;
- uint32_t numSamples : 5;
- uint32_t _reserved : 21;
- };
- uint32_t bits;
- };
-};
-#define BLEND_ENABLE_MASK 0x3D // a2c | oMaskEnable | inputCoverageEnable | sampleMaskEnable
-//////////////////////////////////////////////////////////////////////////
-/// State required for blend jit
-//////////////////////////////////////////////////////////////////////////
-struct BLEND_COMPILE_STATE
-{
- SWR_FORMAT format; // format of render target being blended
- RENDER_TARGET_BLEND_COMPILE_STATE blendState;
- BLEND_DESC desc;
-
- SWR_ZFUNCTION alphaTestFunction;
- ALPHA_TEST_FORMAT alphaTestFormat;
-
- bool operator==(const BLEND_COMPILE_STATE& other) const
- {
- return memcmp(this, &other, sizeof(BLEND_COMPILE_STATE)) == 0;
- }
-
- // Canonicalize state to reduce unnecessary JIT compiles
- void Canonicalize()
- {
- if (!desc.alphaTestEnable)
- {
- alphaTestFormat = (ALPHA_TEST_FORMAT)0;
- alphaTestFunction = (SWR_ZFUNCTION)0;
- }
-
- if (!blendState.blendEnable)
- {
- blendState.sourceAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.sourceBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.destBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.colorBlendFunc = (SWR_BLEND_OP)0;
- blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
- }
-
- if (!blendState.logicOpEnable)
- {
- blendState.logicOpFunc = (SWR_LOGIC_OP)0;
- }
-
- if (!blendState.blendEnable && !blendState.logicOpEnable)
- {
- format = (SWR_FORMAT)0;
- }
-
- if (!desc.independentAlphaBlendEnable)
- {
- blendState.sourceAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
- }
- }
-};
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
deleted file mode 100644
index cd4b5f31ea3..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder.h
- *
- * @brief Includes all the builder related functionality
- *
- * Notes:
- *
- ******************************************************************************/
-
-#include "jit_pch.hpp"
-#include "builder.h"
-
-namespace SwrJit
-{
- using namespace llvm;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Contructor for Builder.
- /// @param pJitMgr - JitManager which contains modules, function passes, etc.
- Builder::Builder(JitManager* pJitMgr) : mpJitMgr(pJitMgr), mpPrivateContext(nullptr)
- {
- mVWidth = pJitMgr->mVWidth;
- mVWidth16 = 16;
-
- mpIRBuilder = &pJitMgr->mBuilder;
-
- // Built in types: scalar
-
- mVoidTy = Type::getVoidTy(pJitMgr->mContext);
- mFP16Ty = Type::getHalfTy(pJitMgr->mContext);
- mFP32Ty = Type::getFloatTy(pJitMgr->mContext);
- mFP32PtrTy = PointerType::get(mFP32Ty, 0);
- mDoubleTy = Type::getDoubleTy(pJitMgr->mContext);
- mInt1Ty = Type::getInt1Ty(pJitMgr->mContext);
- mInt8Ty = Type::getInt8Ty(pJitMgr->mContext);
- mInt16Ty = Type::getInt16Ty(pJitMgr->mContext);
- mInt32Ty = Type::getInt32Ty(pJitMgr->mContext);
- mInt64Ty = Type::getInt64Ty(pJitMgr->mContext);
- mInt8PtrTy = PointerType::get(mInt8Ty, 0);
- mInt16PtrTy = PointerType::get(mInt16Ty, 0);
- mInt32PtrTy = PointerType::get(mInt32Ty, 0);
- mInt64PtrTy = PointerType::get(mInt64Ty, 0);
- mHandleTy = mInt8PtrTy;
-
- mSimd4FP64Ty = getVectorType(mDoubleTy, 4);
-
- // Built in types: target simd
- SetTargetWidth(pJitMgr->mVWidth);
-
- // Built in types: simd16
-
- mSimd16Int1Ty = getVectorType(mInt1Ty, mVWidth16);
- mSimd16Int16Ty = getVectorType(mInt16Ty, mVWidth16);
- mSimd16Int32Ty = getVectorType(mInt32Ty, mVWidth16);
- mSimd16Int64Ty = getVectorType(mInt64Ty, mVWidth16);
- mSimd16FP16Ty = getVectorType(mFP16Ty, mVWidth16);
- mSimd16FP32Ty = getVectorType(mFP32Ty, mVWidth16);
- mSimd16VectorTy = ArrayType::get(mSimd16FP32Ty, 4);
- mSimd16VectorTRTy = ArrayType::get(mSimd16FP32Ty, 5);
-
- mSimd32Int8Ty = getVectorType(mInt8Ty, 32);
-
- if (sizeof(uint32_t*) == 4)
- {
- mIntPtrTy = mInt32Ty;
- mSimdIntPtrTy = mSimdInt32Ty;
- mSimd16IntPtrTy = mSimd16Int32Ty;
- }
- else
- {
- SWR_ASSERT(sizeof(uint32_t*) == 8);
-
- mIntPtrTy = mInt64Ty;
- mSimdIntPtrTy = mSimdInt64Ty;
- mSimd16IntPtrTy = mSimd16Int64Ty;
- }
- }
-
- void Builder::SetTargetWidth(uint32_t width)
- {
- mVWidth = width;
-
- mSimdInt1Ty = getVectorType(mInt1Ty, mVWidth);
- mSimdInt16Ty = getVectorType(mInt16Ty, mVWidth);
- mSimdInt32Ty = getVectorType(mInt32Ty, mVWidth);
- mSimdInt64Ty = getVectorType(mInt64Ty, mVWidth);
- mSimdFP16Ty = getVectorType(mFP16Ty, mVWidth);
- mSimdFP32Ty = getVectorType(mFP32Ty, mVWidth);
- mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4);
- mSimdVectorIntTy = ArrayType::get(mSimdInt32Ty, 4);
- mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
- mSimdVectorTRIntTy = ArrayType::get(mSimdInt32Ty, 5);
- }
-
- /// @brief Mark this alloca as temporary to avoid hoisting later on
- void Builder::SetTempAlloca(Value* inst)
- {
- AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
- SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
- MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, "is_temp_alloca"));
- pAlloca->setMetadata("is_temp_alloca", N);
- }
-
- bool Builder::IsTempAlloca(Value* inst)
- {
- AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
- SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
-
- return (pAlloca->getMetadata("is_temp_alloca") != nullptr);
- }
-
- // Returns true if able to find a call instruction to mark
- bool Builder::SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName)
- {
- CallInst* pCallInstr = dyn_cast<CallInst>(inst);
- if (pCallInstr)
- {
- MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, mdName));
- pCallInstr->setMetadata(mdName, N);
- return true;
- }
- else
- {
- // Follow use def chain back up
- for (Use& u : inst->operands())
- {
- Instruction* srcInst = dyn_cast<Instruction>(u.get());
- if (srcInst)
- {
- if (SetNamedMetaDataOnCallInstr(srcInst, mdName))
- {
- return true;
- }
- }
- }
- }
-
- return false;
- }
-
- bool Builder::HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName)
- {
- CallInst* pCallInstr = dyn_cast<CallInst>(inst);
-
- if (!pCallInstr)
- {
- return false;
- }
-
- return (pCallInstr->getMetadata(mdName) != nullptr);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Packetizes the type. Assumes SOA conversion.
- Type* Builder::GetVectorType(Type* pType)
- {
- if (pType->isVectorTy())
- {
- return pType;
- }
-
- // [N x float] should packetize to [N x <8 x float>]
- if (pType->isArrayTy())
- {
- uint32_t arraySize = pType->getArrayNumElements();
- Type* pArrayType = pType->getArrayElementType();
- Type* pVecArrayType = GetVectorType(pArrayType);
- Type* pVecType = ArrayType::get(pVecArrayType, arraySize);
- return pVecType;
- }
-
- // {float,int} should packetize to {<8 x float>, <8 x int>}
- if (pType->isAggregateType())
- {
- uint32_t numElems = pType->getStructNumElements();
- SmallVector<Type*, 8> vecTypes;
- for (uint32_t i = 0; i < numElems; ++i)
- {
- Type* pElemType = pType->getStructElementType(i);
- Type* pVecElemType = GetVectorType(pElemType);
- vecTypes.push_back(pVecElemType);
- }
- Type* pVecType = StructType::get(JM()->mContext, vecTypes);
- return pVecType;
- }
-
- // [N x float]* should packetize to [N x <8 x float>]*
- if (pType->isPointerTy() && pType->getPointerElementType()->isArrayTy())
- {
- return PointerType::get(GetVectorType(pType->getPointerElementType()),
- pType->getPointerAddressSpace());
- }
-
- // <ty> should packetize to <8 x <ty>>
- Type* vecType = getVectorType(pType, JM()->mVWidth);
- return vecType;
- }
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
deleted file mode 100644
index 9f2c199464d..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder.h
- *
- * @brief Includes all the builder related functionality
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "JitManager.h"
-#include "common/formats.h"
-
-namespace SwrJit
-{
- ///@todo Move this to better place
- enum SHADER_STATS_COUNTER_TYPE
- {
- STATS_INST_EXECUTED = 0,
- STATS_SAMPLE_EXECUTED = 1,
- STATS_SAMPLE_L_EXECUTED = 2,
- STATS_SAMPLE_B_EXECUTED = 3,
- STATS_SAMPLE_C_EXECUTED = 4,
- STATS_SAMPLE_C_LZ_EXECUTED = 5,
- STATS_SAMPLE_C_D_EXECUTED = 6,
- STATS_LOD_EXECUTED = 7,
- STATS_GATHER4_EXECUTED = 8,
- STATS_GATHER4_C_EXECUTED = 9,
- STATS_GATHER4_C_PO_EXECUTED = 10,
- STATS_GATHER4_C_PO_C_EXECUTED = 11,
- STATS_LOAD_RAW_UAV = 12,
- STATS_LOAD_RAW_RESOURCE = 13,
- STATS_STORE_RAW_UAV = 14,
- STATS_STORE_TGSM = 15,
- STATS_DISCARD = 16,
- STATS_BARRIER = 17,
-
- // ------------------
- STATS_TOTAL_COUNTERS
- };
-
- using namespace llvm;
- struct Builder
- {
- Builder(JitManager* pJitMgr);
- virtual ~Builder() {}
-
- IRBuilder<>* IRB() { return mpIRBuilder; };
- JitManager* JM() { return mpJitMgr; }
-
- JitManager* mpJitMgr;
- IRBuilder<>* mpIRBuilder;
-
- uint32_t mVWidth; // vector width target simd
- uint32_t mVWidth16; // vector width simd16
-
- // Built in types: scalar
-
- Type* mVoidTy;
- Type* mHandleTy;
- Type* mInt1Ty;
- Type* mInt8Ty;
- Type* mInt16Ty;
- Type* mInt32Ty;
- Type* mInt64Ty;
- Type* mIntPtrTy;
- Type* mFP16Ty;
- Type* mFP32Ty;
- Type* mFP32PtrTy;
- Type* mDoubleTy;
- Type* mInt8PtrTy;
- Type* mInt16PtrTy;
- Type* mInt32PtrTy;
- Type* mInt64PtrTy;
-
- Type* mSimd4FP64Ty;
-
- // Built in types: target SIMD
-
- Type* mSimdFP16Ty;
- Type* mSimdFP32Ty;
- Type* mSimdInt1Ty;
- Type* mSimdInt16Ty;
- Type* mSimdInt32Ty;
- Type* mSimdInt64Ty;
- Type* mSimdIntPtrTy;
- Type* mSimdVectorTy;
- Type* mSimdVectorTRTy;
- Type* mSimdVectorIntTy;
- Type* mSimdVectorTRIntTy;
-
- // Built in types: simd16
-
- Type* mSimd16FP16Ty;
- Type* mSimd16FP32Ty;
- Type* mSimd16Int1Ty;
- Type* mSimd16Int16Ty;
- Type* mSimd16Int32Ty;
- Type* mSimd16Int64Ty;
- Type* mSimd16IntPtrTy;
- Type* mSimd16VectorTy;
- Type* mSimd16VectorTRTy;
-
- Type* mSimd32Int8Ty;
-
- void SetTargetWidth(uint32_t width);
- void SetTempAlloca(Value* inst);
- bool IsTempAlloca(Value* inst);
- bool SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
- bool HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
- Type* GetVectorType(Type* pType);
- void SetMetadata(StringRef s, uint32_t val)
- {
- llvm::NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getOrInsertNamedMetadata(s);
- Constant* cval = mpIRBuilder->getInt32(val);
- llvm::MDNode* mdNode = llvm::MDNode::get(mpJitMgr->mpCurrentModule->getContext(),
- llvm::ConstantAsMetadata::get(cval));
- if (metaData->getNumOperands())
- {
- metaData->setOperand(0, mdNode);
- }
- else
- {
- metaData->addOperand(mdNode);
- }
- }
- uint32_t GetMetadata(StringRef s)
- {
- NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getNamedMetadata(s);
- if (metaData)
- {
- MDNode* mdNode = metaData->getOperand(0);
- Metadata* val = mdNode->getOperand(0);
- return mdconst::dyn_extract<ConstantInt>(val)->getZExtValue();
- }
- else
- {
- return 0;
- }
- }
-
-#include "gen_builder.hpp"
-#include "gen_builder_meta.hpp"
-#include "gen_builder_intrin.hpp"
-#include "builder_misc.h"
-#include "builder_math.h"
-#include "builder_mem.h"
-
- void SetPrivateContext(Value* pPrivateContext)
- {
- mpPrivateContext = pPrivateContext;
- NotifyPrivateContextSet();
- }
- virtual void NotifyPrivateContextSet() {}
- inline Value* GetPrivateContext() { return mpPrivateContext; }
-
- private:
- Value* mpPrivateContext;
- };
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
deleted file mode 100644
index b67ffbfa7aa..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_gfx_mem.cpp
- *
- * @brief Definition of the gfx mem builder
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder.h"
-#include "common/rdtsc_buckets.h"
-#include "builder_gfx_mem.h"
-
-namespace SwrJit
-{
- using namespace llvm;
-
- BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) : Builder(pJitMgr)
- {
- mpTranslationFuncTy = nullptr;
- mpfnTranslateGfxAddressForRead = nullptr;
- mpfnTranslateGfxAddressForWrite = nullptr;
- mpfnTrackMemAccess = nullptr;
- mpParamSimDC = nullptr;
- mpWorkerData = nullptr;
-
- }
-
- void BuilderGfxMem::NotifyPrivateContextSet()
- {
- }
-
- void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage)
- {
- SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT::MEM_CLIENT_INTERNAL),
- "Internal memory should not be gfxptr_t.");
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it with loads
- /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
- /// @param pBase - Int8* base VB address pointer value
- /// @param vIndices - SIMD wide value of VB byte offsets
- /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
- /// @param scale - value to scale indices by
- Value* BuilderGfxMem::GATHERPS(Value* vSrc,
- Value* pBase,
- Value* vIndices,
- Value* vMask,
- uint8_t scale,
- MEM_CLIENT usage)
- {
- // address may be coming in as 64bit int now so get the pointer
- if (pBase->getType() == mInt64Ty)
- {
- pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
- }
-
- Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
- return vGather;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it with loads
- /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
- /// @param pBase - Int8* base VB address pointer value
- /// @param vIndices - SIMD wide value of VB byte offsets
- /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
- /// @param scale - value to scale indices by
- Value* BuilderGfxMem::GATHERDD(Value* vSrc,
- Value* pBase,
- Value* vIndices,
- Value* vMask,
- uint8_t scale,
- MEM_CLIENT usage)
- {
-
- // address may be coming in as 64bit int now so get the pointer
- if (pBase->getType() == mInt64Ty)
- {
- pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
- }
-
- Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
- return vGather;
- }
-
- void BuilderGfxMem::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
- {
-
- // address may be coming in as 64bit int now so get the pointer
- if (pDst->getType() == mInt64Ty)
- {
- pDst = INT_TO_PTR(pDst, PointerType::get(mInt8Ty, 0));
- }
-
- Builder::SCATTERPS(pDst, BITCAST(vSrc, mSimdFP32Ty), vOffsets, vMask, usage);
- }
-
- Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
- {
- return ADD(base, offset);
- }
-
- Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name)
- {
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- if (isReadOnly)
- {
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForWrite);
- }
- }
- else
- {
- Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name);
- }
- return Ptr;
- }
-
- Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
- {
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ty, Ptr, Idx, Name);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = Builder::GEP(Ty, Ptr, Idx, Name);
- }
- return Ptr;
- }
-
- Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
- {
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ptr, indexList);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = Builder::GEP(Ptr, indexList);
- }
- return Ptr;
- }
-
- Value*
- BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
- {
- bool xlate = (Ptr->getType() == mInt64Ty);
- if (xlate)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- Ptr = Builder::GEP(Ptr, indexList);
- Ptr = PTR_TO_INT(Ptr, mInt64Ty);
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- }
- else
- {
- Ptr = Builder::GEP(Ptr, indexList);
- }
- return Ptr;
- }
-
- Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress)
- {
- SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
- "Access of GFX pointers must have non-null type specified.");
-
- // address may be coming in as 64bit int now so get the pointer
- if (Ptr->getType() == mInt64Ty)
- {
- Ptr = INT_TO_PTR(Ptr, Ty);
- }
-
- return Ptr;
- }
-
- void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead)
- {
-#if defined(KNOB_ENABLE_AR)
- if (!KNOB_AR_ENABLE_MEMORY_EVENTS)
- {
- return;
- }
-
- Value* tmpPtr;
- // convert actual pointers to int64.
- uint32_t size = 0;
-
- if (Ptr->getType() == mInt64Ty)
- {
- DataLayout dataLayout(JM()->mpCurrentModule);
- size = (uint32_t)dataLayout.getTypeAllocSize(Ty);
-
- tmpPtr = Ptr;
- }
- else
- {
- DataLayout dataLayout(JM()->mpCurrentModule);
- size = (uint32_t)dataLayout.getTypeAllocSize(Ptr->getType());
-
- tmpPtr = PTR_TO_INT(Ptr, mInt64Ty);
- }
-
- // There are some shader compile setups where there's no translation functions set up.
- // This would be a situation where the accesses are to internal rasterizer memory and won't
- // be logged.
- // TODO: we may wish to revisit this for URB reads/writes, though.
- if (mpfnTrackMemAccess)
- {
- SWR_ASSERT(mpWorkerData != nullptr);
- CALL(mpfnTrackMemAccess,
- {mpParamSimDC,
- mpWorkerData,
- tmpPtr,
- C((uint32_t)size),
- C((uint8_t)isRead),
- C((uint32_t)usage)});
- }
-#endif
-
- return;
- }
-
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::LOAD(Ptr, Name);
- }
-
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::LOAD(Ptr, Name);
- }
-
- LoadInst* BuilderGfxMem::LOAD(
- Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::LOAD(Ptr, isVolatile, Name);
- }
-
- LoadInst* BuilderGfxMem::LOAD(Value* BasePtr,
- const std::initializer_list<uint32_t>& offset,
- const llvm::Twine& name,
- Type* Ty,
- MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(BasePtr, usage);
-
- bool bNeedTranslation = false;
- if (BasePtr->getType() == mInt64Ty)
- {
- SWR_ASSERT(Ty);
- BasePtr = INT_TO_PTR(BasePtr, Ty, name);
- bNeedTranslation = true;
- }
- std::vector<Value*> valIndices;
- for (auto i : offset)
- {
- valIndices.push_back(C(i));
- }
- BasePtr = Builder::GEPA(BasePtr, valIndices, name);
- if (bNeedTranslation)
- {
- BasePtr = PTR_TO_INT(BasePtr, mInt64Ty, name);
- }
-
- return LOAD(BasePtr, name, Ty, usage);
- }
-
- CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr,
- unsigned Align,
- Value* Mask,
- Value* PassThru,
- const Twine& Name,
- Type* Ty,
- MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, true);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
- }
-
- StoreInst*
- BuilderGfxMem::STORE(Value* Val, Value* Ptr, bool isVolatile, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
- TrackerHelper(Ptr, Ty, usage, false);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::STORE(Val, Ptr, isVolatile, Ty, usage);
- }
-
- StoreInst* BuilderGfxMem::STORE(Value* Val,
- Value* BasePtr,
- const std::initializer_list<uint32_t>& offset,
- Type* Ty,
- MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(BasePtr, usage);
- TrackerHelper(BasePtr, Ty, usage, false);
-
- BasePtr = TranslationHelper(BasePtr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::STORE(Val, BasePtr, offset, Ty, usage);
- }
-
- CallInst* BuilderGfxMem::MASKED_STORE(
- Value* Val, Value* Ptr, unsigned Align, Value* Mask, Type* Ty, MEM_CLIENT usage)
- {
- AssertGFXMemoryParams(Ptr, usage);
-
- TrackerHelper(Ptr, Ty, usage, false);
-
- Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead);
- return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage);
- }
-
- Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
- Type* PtrTy,
- const Twine& Name,
- MEM_CLIENT /* usage */)
- {
- if (PtrTy == nullptr)
- {
- PtrTy = mInt8PtrTy;
- }
- return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
- }
-
- Value* BuilderGfxMem::TranslateGfxAddressForWrite(Value* xpGfxAddress,
- Type* PtrTy,
- const Twine& Name,
- MEM_CLIENT /* usage */)
- {
- if (PtrTy == nullptr)
- {
- PtrTy = mInt8PtrTy;
- }
- return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
- }
-
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
deleted file mode 100644
index c361959b76f..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_gfx_mem.h
- *
- * @brief Definition of the builder to support different translation types for gfx memory access
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "builder.h"
-
-namespace SwrJit
-{
- using namespace llvm;
-
- class BuilderGfxMem : public Builder
- {
- public:
- BuilderGfxMem(JitManager* pJitMgr);
- virtual ~BuilderGfxMem() {}
-
- virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = "");
- virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
- virtual Value*
- GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
- virtual Value*
- GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
-
- virtual LoadInst* LOAD(Value* Ptr,
- const char* Name,
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual LoadInst* LOAD(Value* Ptr,
- const Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual LoadInst* LOAD(Value* Ptr,
- bool isVolatile,
- const Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual LoadInst* LOAD(Value* BasePtr,
- const std::initializer_list<uint32_t>& offset,
- const llvm::Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual CallInst* MASKED_LOAD(Value* Ptr,
- unsigned Align,
- Value* Mask,
- Value* PassThru = nullptr,
- const Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual Value* GATHERPS(Value* src,
- Value* pBase,
- Value* indices,
- Value* mask,
- uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- virtual Value* GATHERDD(Value* src,
- Value* pBase,
- Value* indices,
- Value* mask,
- uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- virtual void SCATTERPS(Value* pDst,
- Value* vSrc,
- Value* vOffsets,
- Value* vMask,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- Value* TranslateGfxAddressForRead(Value* xpGfxAddress,
- Type* PtrTy = nullptr,
- const Twine& Name = "",
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
- Value* TranslateGfxAddressForWrite(Value* xpGfxAddress,
- Type* PtrTy = nullptr,
- const Twine& Name = "",
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
- protected:
- void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage);
-
- virtual void NotifyPrivateContextSet();
-
- virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-
- Value* TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress);
- void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead);
-
- FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
- Value* GetTranslationFunctionForRead() { return mpfnTranslateGfxAddressForRead; }
- Value* GetTranslationFunctionForWrite() { return mpfnTranslateGfxAddressForWrite; }
- Value* GetParamSimDC() { return mpParamSimDC; }
-
- Value* mpWorkerData;
-
- private:
- FunctionType* mpTranslationFuncTy;
- Value* mpfnTranslateGfxAddressForRead;
- Value* mpfnTranslateGfxAddressForWrite;
- Value* mpParamSimDC;
- Value* mpfnTrackMemAccess;
- };
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h
deleted file mode 100644
index 02aa6f97cdf..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_math.h
- *
- * @brief math/alu builder functions
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-Value* VLOG2PS(Value* src);
-Value* VPOW24PS(Value* src);
-Value* VEXP2PS(Value* src);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
deleted file mode 100644
index b5eb0a782b1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ /dev/null
@@ -1,767 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_misc.cpp
- *
- * @brief Implementation for miscellaneous builder functions
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder.h"
-
-#include <cstdarg>
-
-namespace SwrJit
-{
- void Builder::AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage)
- {
- SWR_ASSERT(
- ptr->getType() != mInt64Ty,
- "Address appears to be GFX access. Requires translation through BuilderGfxMem.");
- }
-
- Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name)
- {
- return IRB()->CreateGEP(Ptr, Idx, Name);
- }
-
- Value* Builder::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
- {
- return IRB()->CreateGEP(Ty, Ptr, Idx, Name);
- }
-
- Value* Builder::GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
- {
- std::vector<Value*> indices;
- for (auto i : indexList)
- indices.push_back(i);
- return GEPA(ptr, indices);
- }
-
- Value* Builder::GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
- {
- std::vector<Value*> indices;
- for (auto i : indexList)
- indices.push_back(C(i));
- return GEPA(ptr, indices);
- }
-
- Value* Builder::GEPA(Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name)
- {
- return IRB()->CreateGEP(Ptr, IdxList, Name);
- }
-
- Value* Builder::GEPA(Type* Ty, Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name)
- {
- return IRB()->CreateGEP(Ty, Ptr, IdxList, Name);
- }
-
- Value* Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
- {
- std::vector<Value*> indices;
- for (auto i : indexList)
- indices.push_back(i);
- return IN_BOUNDS_GEP(ptr, indices);
- }
-
- Value* Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList)
- {
- std::vector<Value*> indices;
- for (auto i : indexList)
- indices.push_back(C(i));
- return IN_BOUNDS_GEP(ptr, indices);
- }
-
- LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(Ptr, usage);
- return IRB()->CreateLoad(Ptr, Name);
- }
-
- LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(Ptr, usage);
- return IRB()->CreateLoad(Ptr, Name);
- }
-
- LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(Ptr, usage);
- return IRB()->CreateLoad(Ty, Ptr, Name);
- }
-
- LoadInst*
- Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(Ptr, usage);
- return IRB()->CreateLoad(Ptr, isVolatile, Name);
- }
-
- LoadInst* Builder::LOAD(Value* basePtr,
- const std::initializer_list<uint32_t>& indices,
- const llvm::Twine& name,
- Type* Ty,
- MEM_CLIENT usage)
- {
- std::vector<Value*> valIndices;
- for (auto i : indices)
- valIndices.push_back(C(i));
- return Builder::LOAD(GEPA(basePtr, valIndices), name);
- }
-
- LoadInst* Builder::LOADV(Value* basePtr,
- const std::initializer_list<Value*>& indices,
- const llvm::Twine& name)
- {
- std::vector<Value*> valIndices;
- for (auto i : indices)
- valIndices.push_back(i);
- return LOAD(GEPA(basePtr, valIndices), name);
- }
-
- StoreInst*
- Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, MEM_CLIENT usage)
- {
- std::vector<Value*> valIndices;
- for (auto i : indices)
- valIndices.push_back(C(i));
- return STORE(val, GEPA(basePtr, valIndices));
- }
-
- StoreInst*
- Builder::STOREV(Value* val, Value* basePtr, const std::initializer_list<Value*>& indices)
- {
- std::vector<Value*> valIndices;
- for (auto i : indices)
- valIndices.push_back(i);
- return STORE(val, GEPA(basePtr, valIndices));
- }
-
- Value* Builder::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
- {
- return GEP(base, offset);
- }
-
- Value* Builder::MEM_ADD(Value* i32Incr,
- Value* basePtr,
- const std::initializer_list<uint32_t>& indices,
- const llvm::Twine& name)
- {
- Value* i32Value = LOAD(GEP(basePtr, indices), name);
- Value* i32Result = ADD(i32Value, i32Incr);
- return STORE(i32Result, GEP(basePtr, indices));
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it with loads
- /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
- /// @param pBase - Int8* base VB address pointer value
- /// @param vIndices - SIMD wide value of VB byte offsets
- /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
- /// @param scale - value to scale indices by
- Value* Builder::GATHERPS(Value* vSrc,
- Value* pBase,
- Value* vIndices,
- Value* vMask,
- uint8_t scale,
- MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(pBase, usage);
-
- return VGATHERPS(vSrc, pBase, vIndices, vMask, C(scale));
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it with loads
- /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
- /// @param pBase - Int8* base VB address pointer value
- /// @param vIndices - SIMD wide value of VB byte offsets
- /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
- /// @param scale - value to scale indices by
- Value* Builder::GATHERDD(Value* vSrc,
- Value* pBase,
- Value* vIndices,
- Value* vMask,
- uint8_t scale,
- MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(pBase, usage);
-
- return VGATHERDD(vSrc, pBase, vIndices, vMask, C(scale));
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it with loads
- /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
- /// @param pBase - Int8* base VB address pointer value
- /// @param vIndices - SIMD wide value of VB byte offsets
- /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
- /// @param scale - value to scale indices by
- Value*
- Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
- {
- return VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Alternative masked gather where source is a vector of pointers
- /// @param pVecSrcPtr - SIMD wide vector of pointers
- /// @param pVecMask - SIMD active lanes
- /// @param pVecPassthru - SIMD wide vector of values to load when lane is inactive
- Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru)
- {
- return MASKED_GATHER(pVecSrcPtr, AlignType(4), pVecMask, pVecPassthru);
- }
-
- void Builder::SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask)
- {
- MASKED_SCATTER(pVecSrc, pVecDstPtr, AlignType(4), pVecMask);
- }
-
- void Builder::Gather4(const SWR_FORMAT format,
- Value* pSrcBase,
- Value* byteOffsets,
- Value* mask,
- Value* vGatherComponents[],
- bool bPackedOutput,
- MEM_CLIENT usage)
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
- {
- GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
- }
- else
- {
- GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
- }
- }
-
- void Builder::GATHER4PS(const SWR_FORMAT_INFO& info,
- Value* pSrcBase,
- Value* byteOffsets,
- Value* vMask,
- Value* vGatherComponents[],
- bool bPackedOutput,
- MEM_CLIENT usage)
- {
- switch (info.bpp / info.numComps)
- {
- case 16:
- {
- Value* vGatherResult[2];
-
- // TODO: vGatherMaskedVal
- Value* vGatherMaskedVal = VIMMED1((float)0);
-
- // always have at least one component out of x or y to fetch
-
- vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
- // e.g. result of first 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
- //
-
- // if we have at least one component out of x or y to fetch
- if (info.numComps > 2)
- {
- // offset base to the next components(zw) in the vertex to gather
- pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
-
- vGatherResult[1] =
- GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
- // e.g. result of second 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
- //
- }
- else
- {
- vGatherResult[1] = vGatherMaskedVal;
- }
-
- // Shuffle gathered components into place, each row is a component
- Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
- }
- break;
- case 32:
- {
- // apply defaults
- for (uint32_t i = 0; i < 4; ++i)
- {
- vGatherComponents[i] = VIMMED1(*(float*)&info.defaults[i]);
- }
-
- for (uint32_t i = 0; i < info.numComps; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
-
- // Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERPS(
- vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
-
- // offset base to the next component to gather
- pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- }
- }
- break;
- default:
- SWR_INVALID("Invalid float format");
- break;
- }
- }
-
- void Builder::GATHER4DD(const SWR_FORMAT_INFO& info,
- Value* pSrcBase,
- Value* byteOffsets,
- Value* vMask,
- Value* vGatherComponents[],
- bool bPackedOutput,
- MEM_CLIENT usage)
- {
- switch (info.bpp / info.numComps)
- {
- case 8:
- {
- Value* vGatherMaskedVal = VIMMED1((int32_t)0);
- Value* vGatherResult =
- GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
- // e.g. result of an 8x32bit integer gather for 8bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
-
- Shuffle8bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
- }
- break;
- case 16:
- {
- Value* vGatherResult[2];
-
- // TODO: vGatherMaskedVal
- Value* vGatherMaskedVal = VIMMED1((int32_t)0);
-
- // always have at least one component out of x or y to fetch
-
- vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
- // e.g. result of first 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
- //
-
- // if we have at least one component out of x or y to fetch
- if (info.numComps > 2)
- {
- // offset base to the next components(zw) in the vertex to gather
- pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
-
- vGatherResult[1] =
- GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
- // e.g. result of second 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
- //
- }
- else
- {
- vGatherResult[1] = vGatherMaskedVal;
- }
-
- // Shuffle gathered components into place, each row is a component
- Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
- }
- break;
- case 32:
- {
- // apply defaults
- for (uint32_t i = 0; i < 4; ++i)
- {
- vGatherComponents[i] = VIMMED1((int)info.defaults[i]);
- }
-
- for (uint32_t i = 0; i < info.numComps; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
-
- // Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERDD(
- vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
-
- // offset base to the next component to gather
- pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- }
- }
- break;
- default:
- SWR_INVALID("unsupported format");
- break;
- }
- }
-
- void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO& info,
- Value* vGatherInput[2],
- Value* vGatherOutput[4],
- bool bPackedOutput)
- {
- // cast types
- Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
-
- // input could either be float or int vector; do shuffle work in int
- vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
- vGatherInput[1] = BITCAST(vGatherInput[1], mSimdInt32Ty);
-
- if (bPackedOutput)
- {
- Type* v128bitTy = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128),
- mVWidth / 4); // vwidth is units of 32 bits
-
- // shuffle mask
- Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
- 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
- Value* vShufResult =
- BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
-
- Value* vi128XY =
- BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
- // after PERMD: move and pack xy components into each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
-
- // do the same for zw components
- Value* vi128ZW = nullptr;
- if (info.numComps > 2)
- {
- Value* vShufResult =
- BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
- vi128ZW =
- BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
- }
-
- for (uint32_t i = 0; i < 4; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
- // todo: fixed for packed
- Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
- if (i >= info.numComps)
- {
- // set the default component val
- vGatherOutput[swizzleIndex] = vGatherMaskedVal;
- continue;
- }
-
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- // extract packed component 128 bit lanes
- vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
- }
- }
- else
- {
- // pshufb masks for each component
- Value* vConstMask[2];
- // x/z shuffle mask
- vConstMask[0] = C<char>({
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- });
-
- // y/w shuffle mask
- vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
- 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
-
- // shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
- // apply defaults
- for (uint32_t i = 0; i < 4; ++i)
- {
- vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
- }
-
- for (uint32_t i = 0; i < info.numComps; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
-
- // select correct constMask for x/z or y/w pshufb
- uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- uint32_t selectedGather = (i < 2) ? 0 : 1;
-
- vGatherOutput[swizzleIndex] =
- BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty),
- vConstMask[selectedMask]),
- vGatherTy);
- // after pshufb mask for x channel; z uses the same shuffle from the second gather
- // 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
- }
- }
- }
-
- void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
- Value* vGatherInput,
- Value* vGatherOutput[],
- bool bPackedOutput)
- {
- // cast types
- Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
-
- if (bPackedOutput)
- {
- Type* v128Ty = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128),
- mVWidth / 4); // vwidth is units of 32 bits
- // shuffle mask
- Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
- 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
- Value* vShufResult =
- BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
-
- Value* vi128XY =
- BITCAST(VPERMD(vShufResult, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty);
- // after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
-
- // do the same for zw components
- Value* vi128ZW = nullptr;
- if (info.numComps > 2)
- {
- vi128ZW =
- BITCAST(VPERMD(vShufResult, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty);
- }
-
- // sign extend all enabled components. If we have a fill vVertexElements, output to
- // current simdvertex
- for (uint32_t i = 0; i < 4; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
- // todo: fix for packed
- Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
- if (i >= info.numComps)
- {
- // set the default component val
- vGatherOutput[swizzleIndex] = vGatherMaskedVal;
- continue;
- }
-
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- // sign extend
- vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
- }
- }
- // else zero extend
- else
- {
- // shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
- // apply defaults
- for (uint32_t i = 0; i < 4; ++i)
- {
- vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
- }
-
- for (uint32_t i = 0; i < info.numComps; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
-
- // pshufb masks for each component
- Value* vConstMask;
- switch (i)
- {
- case 0:
- // x shuffle mask
- vConstMask =
- C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
- break;
- case 1:
- // y shuffle mask
- vConstMask =
- C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
- break;
- case 2:
- // z shuffle mask
- vConstMask =
- C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
- break;
- case 3:
- // w shuffle mask
- vConstMask =
- C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
- break;
- default:
- vConstMask = nullptr;
- break;
- }
-
- assert(vConstMask && "Invalid info.numComps value");
- vGatherOutput[swizzleIndex] =
- BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb for x channel
- // 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
- }
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief emulates a scatter operation.
- /// @param pDst - pointer to destination
- /// @param vSrc - vector of src data to scatter
- /// @param vOffsets - vector of byte offsets from pDst
- /// @param vMask - mask of valid lanes
- void Builder::SCATTERPS(
- Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage)
- {
- AssertMemoryUsageParams(pDst, usage);
-#if LLVM_VERSION_MAJOR >= 11
- SWR_ASSERT(cast<VectorType>(vSrc->getType())->getElementType()->isFloatTy());
-#else
- SWR_ASSERT(vSrc->getType()->getVectorElementType()->isFloatTy());
-#endif
- VSCATTERPS(pDst, vMask, vOffsets, vSrc, C(1));
- return;
-
- /* Scatter algorithm
-
- while(Index = BitScanForward(mask))
- srcElem = srcVector[Index]
- offsetElem = offsetVector[Index]
- *(pDst + offsetElem) = srcElem
- Update mask (&= ~(1<<Index)
-
- */
-
- /*
-
- // Reference implementation kept around for reference
-
- BasicBlock* pCurBB = IRB()->GetInsertBlock();
- Function* pFunc = pCurBB->getParent();
- Type* pSrcTy = vSrc->getType()->getVectorElementType();
-
- // Store vectors on stack
- if (pScatterStackSrc == nullptr)
- {
- // Save off stack allocations and reuse per scatter. Significantly reduces stack
- // requirements for shaders with a lot of scatters.
- pScatterStackSrc = CreateEntryAlloca(pFunc, mSimdInt64Ty);
- pScatterStackOffsets = CreateEntryAlloca(pFunc, mSimdInt32Ty);
- }
-
- Value* pSrcArrayPtr = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
- Value* pOffsetsArrayPtr = pScatterStackOffsets;
- STORE(vSrc, pSrcArrayPtr);
- STORE(vOffsets, pOffsetsArrayPtr);
-
- // Cast to pointers for random access
- pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
- pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
-
- Value* pMask = VMOVMSK(vMask);
-
- // Setup loop basic block
- BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, "Scatter_Loop", pFunc);
-
- // compute first set bit
- Value* pIndex = CTTZ(pMask, C(false));
-
- Value* pIsUndef = ICMP_EQ(pIndex, C(32));
-
- // Split current block or create new one if building inline
- BasicBlock* pPostLoop;
- if (pCurBB->getTerminator())
- {
- pPostLoop = pCurBB->splitBasicBlock(cast<Instruction>(pIsUndef)->getNextNode());
-
- // Remove unconditional jump created by splitBasicBlock
- pCurBB->getTerminator()->eraseFromParent();
-
- // Add terminator to end of original block
- IRB()->SetInsertPoint(pCurBB);
-
- // Add conditional branch
- COND_BR(pIsUndef, pPostLoop, pLoop);
- }
- else
- {
- pPostLoop = BasicBlock::Create(mpJitMgr->mContext, "PostScatter_Loop", pFunc);
-
- // Add conditional branch
- COND_BR(pIsUndef, pPostLoop, pLoop);
- }
-
- // Add loop basic block contents
- IRB()->SetInsertPoint(pLoop);
- PHINode* pIndexPhi = PHI(mInt32Ty, 2);
- PHINode* pMaskPhi = PHI(mInt32Ty, 2);
-
- pIndexPhi->addIncoming(pIndex, pCurBB);
- pMaskPhi->addIncoming(pMask, pCurBB);
-
- // Extract elements for this index
- Value* pSrcElem = LOADV(pSrcArrayPtr, {pIndexPhi});
- Value* pOffsetElem = LOADV(pOffsetsArrayPtr, {pIndexPhi});
-
- // GEP to this offset in dst
- Value* pCurDst = GEP(pDst, pOffsetElem, mInt8PtrTy);
- pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
- STORE(pSrcElem, pCurDst);
-
- // Update the mask
- Value* pNewMask = AND(pMaskPhi, NOT(SHL(C(1), pIndexPhi)));
-
- // Terminator
- Value* pNewIndex = CTTZ(pNewMask, C(false));
-
- pIsUndef = ICMP_EQ(pNewIndex, C(32));
- COND_BR(pIsUndef, pPostLoop, pLoop);
-
- // Update phi edges
- pIndexPhi->addIncoming(pNewIndex, pLoop);
- pMaskPhi->addIncoming(pNewMask, pLoop);
-
- // Move builder to beginning of post loop
- IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
-
- */
- }
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
deleted file mode 100644
index 429d5779a4d..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_misc.h
- *
- * @brief miscellaneous builder functions
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-public:
-enum class MEM_CLIENT
-{
- MEM_CLIENT_INTERNAL,
- GFX_MEM_CLIENT_FETCH,
- GFX_MEM_CLIENT_SAMPLER,
- GFX_MEM_CLIENT_SHADER,
- GFX_MEM_CLIENT_STREAMOUT,
- GFX_MEM_CLIENT_URB
-};
-
-protected:
-virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
-void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage);
-
-public:
-virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = "");
-virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
-virtual Value* GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
-virtual Value*
-GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
-
-Value* GEPA(Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name = "");
-Value* GEPA(Type* Ty, Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name = "");
-
-Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList);
-Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
-
-virtual LoadInst*
- LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value* Ptr,
- const Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-virtual LoadInst*
- LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value* Ptr,
- bool isVolatile,
- const Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value* BasePtr,
- const std::initializer_list<uint32_t>& offset,
- const llvm::Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-virtual CallInst* MASKED_LOAD(Value* Ptr,
- unsigned Align,
- Value* Mask,
- Value* PassThru = nullptr,
- const Twine& Name = "",
- Type* Ty = nullptr,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
-{
- return IRB()->CreateMaskedLoad(Ptr, AlignType(Align), Mask, PassThru, Name);
-}
-
-virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
-{
- return IRB()->CreateStore(Val, Ptr, isVolatile);
-}
-
-virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL)
-{
- return IRB()->CreateMaskedStore(Val, Ptr, AlignType(Align), Mask);
-}
-
-LoadInst* LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
-StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
-
-Value* MEM_ADD(Value* i32Incr,
- Value* basePtr,
- const std::initializer_list<uint32_t>& indices,
- const llvm::Twine& name = "");
-
-void Gather4(const SWR_FORMAT format,
- Value* pSrcBase,
- Value* byteOffsets,
- Value* mask,
- Value* vGatherComponents[],
- bool bPackedOutput,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-virtual Value* GATHERPS(Value* src,
- Value* pBase,
- Value* indices,
- Value* mask,
- uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-void GATHER4PS(const SWR_FORMAT_INFO& info,
- Value* pSrcBase,
- Value* byteOffsets,
- Value* mask,
- Value* vGatherComponents[],
- bool bPackedOutput,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-virtual Value* GATHERDD(Value* src,
- Value* pBase,
- Value* indices,
- Value* mask,
- uint8_t scale = 1,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-void GATHER4DD(const SWR_FORMAT_INFO& info,
- Value* pSrcBase,
- Value* byteOffsets,
- Value* mask,
- Value* vGatherComponents[],
- bool bPackedOutput,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-
-Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
-void SCATTER_PTR(Value* pVecDstPtr, Value* pVecSrc, Value* pVecMask);
-
-virtual void SCATTERPS(Value* pDst,
- Value* vSrc,
- Value* vOffsets,
- Value* vMask,
- MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL);
-
-void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
- Value* vGatherInput,
- Value* vGatherOutput[],
- bool bPackedOutput);
-void Shuffle16bpcGather4(const SWR_FORMAT_INFO& info,
- Value* vGatherInput[],
- Value* vGatherOutput[],
- bool bPackedOutput);
-
-// Static stack allocations for scatter operations
-Value* pScatterStackSrc{nullptr};
-Value* pScatterStackOffsets{nullptr};
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
deleted file mode 100644
index 8080a40a1f9..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ /dev/null
@@ -1,1125 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_misc.cpp
- *
- * @brief Implementation for miscellaneous builder functions
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder.h"
-#include "common/rdtsc_buckets.h"
-
-#include <cstdarg>
-
-extern "C" void CallPrint(const char* fmt, ...);
-
-namespace SwrJit
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Convert an IEEE 754 32-bit single precision float to an
- /// 16 bit float with 5 exponent bits and a variable
- /// number of mantissa bits.
- /// @param val - 32-bit float
- /// @todo Maybe move this outside of this file into a header?
- static uint16_t ConvertFloat32ToFloat16(float val)
- {
- uint32_t sign, exp, mant;
- uint32_t roundBits;
-
- // Extract the sign, exponent, and mantissa
- uint32_t uf = *(uint32_t*)&val;
- sign = (uf & 0x80000000) >> 31;
- exp = (uf & 0x7F800000) >> 23;
- mant = uf & 0x007FFFFF;
-
- // Check for out of range
- if (std::isnan(val))
- {
- exp = 0x1F;
- mant = 0x200;
- sign = 1; // set the sign bit for NANs
- }
- else if (std::isinf(val))
- {
- exp = 0x1f;
- mant = 0x0;
- }
- else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
- {
- exp = 0x1E;
- mant = 0x3FF;
- }
- else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
- {
- mant |= 0x00800000;
- for (; exp <= 0x70; mant >>= 1, exp++)
- ;
- exp = 0;
- mant = mant >> 13;
- }
- else if (exp < 0x66) // Too small to represent -> Zero
- {
- exp = 0;
- mant = 0;
- }
- else
- {
- // Saves bits that will be shifted off for rounding
- roundBits = mant & 0x1FFFu;
- // convert exponent and mantissa to 16 bit format
- exp = exp - 0x70;
- mant = mant >> 13;
-
- // Essentially RTZ, but round up if off by only 1 lsb
- if (roundBits == 0x1FFFu)
- {
- mant++;
- // check for overflow
- if ((mant & 0xC00u) != 0)
- exp++;
- // make sure only the needed bits are used
- mant &= 0x3FF;
- }
- }
-
- uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
- return (uint16_t)tmpVal;
- }
-
- Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); }
-
- Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
-
- Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
-
- Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
-
- Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
-
- Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); }
-
- Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
-
- Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
-
- Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); }
-
- Constant* Builder::PRED(bool pred)
- {
- return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0));
- }
-
- Value* Builder::VIMMED1(uint64_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(uint64_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(int i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(int i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(uint32_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(uint32_t i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(float i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantFP>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantFP>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(float i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantFP>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantFP>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1(bool i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VIMMED1_16(bool i)
- {
-#if LLVM_VERSION_MAJOR <= 10
- return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
-#elif LLVM_VERSION_MAJOR == 11
- return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
-#else
- return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
-#endif
- }
-
- Value* Builder::VUNDEF_IPTR() { return UndefValue::get(getVectorType(mInt32PtrTy, mVWidth)); }
-
- Value* Builder::VUNDEF(Type* t) { return UndefValue::get(getVectorType(t, mVWidth)); }
-
- Value* Builder::VUNDEF_I() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth)); }
-
- Value* Builder::VUNDEF_I_16() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth16)); }
-
- Value* Builder::VUNDEF_F() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth)); }
-
- Value* Builder::VUNDEF_F_16() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth16)); }
-
- Value* Builder::VUNDEF(Type* ty, uint32_t size)
- {
- return UndefValue::get(getVectorType(ty, size));
- }
-
- Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name)
- {
- // check if src is already a vector
- if (src->getType()->isVectorTy())
- {
- return src;
- }
-
- return VECTOR_SPLAT(mVWidth, src, name);
- }
-
- Value* Builder::VBROADCAST_16(Value* src)
- {
- // check if src is already a vector
- if (src->getType()->isVectorTy())
- {
- return src;
- }
-
- return VECTOR_SPLAT(mVWidth16, src);
- }
-
- uint32_t Builder::IMMED(Value* v)
- {
- SWR_ASSERT(isa<ConstantInt>(v));
- ConstantInt* pValConst = cast<ConstantInt>(v);
- return pValConst->getZExtValue();
- }
-
- int32_t Builder::S_IMMED(Value* v)
- {
- SWR_ASSERT(isa<ConstantInt>(v));
- ConstantInt* pValConst = cast<ConstantInt>(v);
- return pValConst->getSExtValue();
- }
-
- CallInst* Builder::CALL(Value* Callee,
- const std::initializer_list<Value*>& argsList,
- const llvm::Twine& name)
- {
- std::vector<Value*> args;
- for (auto arg : argsList)
- args.push_back(arg);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args, name);
-#else
- return CALLA(Callee, args, name);
-#endif
- }
-
- CallInst* Builder::CALL(Value* Callee, Value* arg)
- {
- std::vector<Value*> args;
- args.push_back(arg);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args);
-#else
- return CALLA(Callee, args);
-#endif
- }
-
- CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2)
- {
- std::vector<Value*> args;
- args.push_back(arg1);
- args.push_back(arg2);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args);
-#else
- return CALLA(Callee, args);
-#endif
- }
-
- CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3)
- {
- std::vector<Value*> args;
- args.push_back(arg1);
- args.push_back(arg2);
- args.push_back(arg3);
-#if LLVM_VERSION_MAJOR >= 11
- // see comment to CALLA(Callee) function in the header
- return CALLA(FunctionCallee(cast<Function>(Callee)), args);
-#else
- return CALLA(Callee, args);
-#endif
- }
-
- Value* Builder::VRCP(Value* va, const llvm::Twine& name)
- {
- return FDIV(VIMMED1(1.0f), va, name); // 1 / a
- }
-
- Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY)
- {
- Value* vOut = FMADDPS(vA, vX, vC);
- vOut = FMADDPS(vB, vY, vOut);
- return vOut;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief insert a JIT call to CallPrint
- /// - outputs formatted string to both stdout and VS output window
- /// - DEBUG builds only
- /// Usage example:
- /// PRINT("index %d = 0x%p\n",{C(lane), pIndex});
- /// where C(lane) creates a constant value to print, and pIndex is the Value*
- /// result from a GEP, printing out the pointer to memory
- /// @param printStr - constant string to print, which includes format specifiers
- /// @param printArgs - initializer list of Value*'s to print to std out
- CallInst* Builder::PRINT(const std::string& printStr,
- const std::initializer_list<Value*>& printArgs)
- {
- // push the arguments to CallPrint into a vector
- std::vector<Value*> printCallArgs;
- // save room for the format string. we still need to modify it for vectors
- printCallArgs.resize(1);
-
- // search through the format string for special processing
- size_t pos = 0;
- std::string tempStr(printStr);
- pos = tempStr.find('%', pos);
- auto v = printArgs.begin();
-
- while ((pos != std::string::npos) && (v != printArgs.end()))
- {
- Value* pArg = *v;
- Type* pType = pArg->getType();
-
- if (pType->isVectorTy())
- {
- Type* pContainedType = pType->getContainedType(0);
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVectorType = cast<FixedVectorType>(pType);
-#elif LLVM_VERSION_MAJOR >= 11
- VectorType* pVectorType = cast<VectorType>(pType);
-#endif
- if (toupper(tempStr[pos + 1]) == 'X')
- {
- tempStr[pos] = '0';
- tempStr[pos + 1] = 'x';
- tempStr.insert(pos + 2, "%08X ");
- pos += 7;
-
- printCallArgs.push_back(VEXTRACT(pArg, C(0)));
-
- std::string vectorFormatStr;
-#if LLVM_VERSION_MAJOR >= 11
- for (uint32_t i = 1; i < pVectorType->getNumElements(); ++i)
-#else
- for (uint32_t i = 1; i < pType->getVectorNumElements(); ++i)
-#endif
- {
- vectorFormatStr += "0x%08X ";
- printCallArgs.push_back(VEXTRACT(pArg, C(i)));
- }
-
- tempStr.insert(pos, vectorFormatStr);
- pos += vectorFormatStr.size();
- }
- else if ((tempStr[pos + 1] == 'f') && (pContainedType->isFloatTy()))
- {
- uint32_t i = 0;
-#if LLVM_VERSION_MAJOR >= 11
- for (; i < pVectorType->getNumElements() - 1; i++)
-#else
- for (; i < pType->getVectorNumElements() - 1; i++)
-#endif
- {
- tempStr.insert(pos, std::string("%f "));
- pos += 3;
- printCallArgs.push_back(
- FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
- }
- printCallArgs.push_back(
- FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
- }
- else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy()))
- {
- uint32_t i = 0;
-#if LLVM_VERSION_MAJOR >= 11
- for (; i < pVectorType->getNumElements() - 1; i++)
-#else
- for (; i < pType->getVectorNumElements() - 1; i++)
-#endif
- {
- tempStr.insert(pos, std::string("%d "));
- pos += 3;
- printCallArgs.push_back(
- S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- printCallArgs.push_back(
- S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy()))
- {
- uint32_t i = 0;
-#if LLVM_VERSION_MAJOR >= 11
- for (; i < pVectorType->getNumElements() - 1; i++)
-#else
- for (; i < pType->getVectorNumElements() - 1; i++)
-#endif
- {
- tempStr.insert(pos, std::string("%d "));
- pos += 3;
- printCallArgs.push_back(
- Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- printCallArgs.push_back(
- Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
- }
- }
- else
- {
- if (toupper(tempStr[pos + 1]) == 'X')
- {
- tempStr[pos] = '0';
- tempStr.insert(pos + 1, "x%08");
- printCallArgs.push_back(pArg);
- pos += 3;
- }
- // for %f we need to cast float Values to doubles so that they print out correctly
- else if ((tempStr[pos + 1] == 'f') && (pType->isFloatTy()))
- {
- printCallArgs.push_back(FP_EXT(pArg, Type::getDoubleTy(JM()->mContext)));
- pos++;
- }
- else
- {
- printCallArgs.push_back(pArg);
- }
- }
-
- // advance to the next argument
- v++;
- pos = tempStr.find('%', ++pos);
- }
-
- // create global variable constant string
- Constant* constString = ConstantDataArray::getString(JM()->mContext, tempStr, true);
- GlobalVariable* gvPtr = new GlobalVariable(
- constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr");
- JM()->mpCurrentModule->getGlobalList().push_back(gvPtr);
-
- // get a pointer to the first character in the constant string array
- std::vector<Constant*> geplist{C(0), C(0)};
- Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false);
-
- // insert the pointer to the format string in the argument vector
- printCallArgs[0] = strGEP;
-
- // get pointer to CallPrint function and insert decl into the module if needed
- std::vector<Type*> args;
- args.push_back(PointerType::get(mInt8Ty, 0));
- FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true);
- Function* callPrintFn =
-#if LLVM_VERSION_MAJOR >= 9
- cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy).getCallee());
-#else
- cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
-#endif
-
- // if we haven't yet added the symbol to the symbol table
- if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint);
- }
-
- // insert a call to CallPrint
- return CALLA(callPrintFn, printCallArgs);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Wrapper around PRINT with initializer list.
- CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); }
-
- Value* Builder::EXTRACT_16(Value* x, uint32_t imm)
- {
- if (imm == 0)
- {
- return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
- }
- else
- {
- return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
- }
- }
-
- Value* Builder::JOIN_16(Value* a, Value* b)
- {
- return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief convert x86 <N x float> mask to llvm <N x i1> mask
- Value* Builder::MASK(Value* vmask)
- {
- Value* src = BITCAST(vmask, mSimdInt32Ty);
- return ICMP_SLT(src, VIMMED1(0));
- }
-
- Value* Builder::MASK_16(Value* vmask)
- {
- Value* src = BITCAST(vmask, mSimd16Int32Ty);
- return ICMP_SLT(src, VIMMED1_16(0));
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
- Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); }
-
- Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); }
-
- /// @brief Convert <Nxi1> llvm mask to integer
- Value* Builder::VMOVMSK(Value* mask)
- {
-#if LLVM_VERSION_MAJOR >= 11
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVectorType = cast<FixedVectorType>(mask->getType());
-#else
- VectorType* pVectorType = cast<VectorType>(mask->getType());
-#endif
- SWR_ASSERT(pVectorType->getElementType() == mInt1Ty);
- uint32_t numLanes = pVectorType->getNumElements();
-#else
- SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
- uint32_t numLanes = mask->getType()->getVectorNumElements();
-#endif
- Value* i32Result;
- if (numLanes == 8)
- {
- i32Result = BITCAST(mask, mInt8Ty);
- }
- else if (numLanes == 16)
- {
- i32Result = BITCAST(mask, mInt16Ty);
- }
- else
- {
- SWR_ASSERT("Unsupported vector width");
- i32Result = BITCAST(mask, mInt8Ty);
- }
- return Z_EXT(i32Result, mInt32Ty);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation in LLVM IR. If not
- /// supported on the underlying platform, emulate it
- /// @param a - 256bit SIMD(32x8bit) of 8bit integer values
- /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
- /// Byte masks in lower 128 lane of b selects 8 bit values from lower
- /// 128bits of a, and vice versa for the upper lanes. If the mask
- /// value is negative, '0' is inserted.
- Value* Builder::PSHUFB(Value* a, Value* b)
- {
- Value* res;
- // use avx2 pshufb instruction if available
- if (JM()->mArch.AVX2())
- {
- res = VPSHUFB(a, b);
- }
- else
- {
- Constant* cB = dyn_cast<Constant>(b);
- assert(cB != nullptr);
- // number of 8 bit elements in b
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElms = cast<FixedVectorType>(cB->getType())->getNumElements();
-#else
- uint32_t numElms = cast<VectorType>(cB->getType())->getNumElements();
-#endif
- // output vector
- Value* vShuf = UndefValue::get(getVectorType(mInt8Ty, numElms));
-
- // insert an 8 bit value from the high and low lanes of a per loop iteration
- numElms /= 2;
- for (uint32_t i = 0; i < numElms; i++)
- {
- ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i));
- ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms));
-
- // extract values from constant mask
- char valLow128bLane = (char)(cLow128b->getSExtValue());
- char valHigh128bLane = (char)(cHigh128b->getSExtValue());
-
- Value* insertValLow128b;
- Value* insertValHigh128b;
-
- // if the mask value is negative, insert a '0' in the respective output position
- // otherwise, lookup the value at mask position (bits 3..0 of the respective mask
- // byte) in a and insert in output vector
- insertValLow128b =
- (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
- insertValHigh128b = (valHigh128bLane < 0)
- ? C((char)0)
- : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
-
- vShuf = VINSERT(vShuf, insertValLow128b, i);
- vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms));
- }
- res = vShuf;
- }
- return res;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
- /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only
- /// lower 8 values are used.
- Value* Builder::PMOVSXBD(Value* a)
- {
- // VPMOVSXBD output type
- Type* v8x32Ty = getVectorType(mInt32Ty, 8);
- // Extract 8 values from 128bit lane and sign extend
- return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
- /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
- Value* Builder::PMOVSXWD(Value* a)
- {
- // VPMOVSXWD output type
- Type* v8x32Ty = getVectorType(mInt32Ty, 8);
- // Extract 8 values from 128bit lane and sign extend
- return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
- /// in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
- Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
- {
- // Bitcast Nxint16 to Nxhalf
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElems = cast<FixedVectorType>(a->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElems = cast<VectorType>(a->getType())->getNumElements();
-#else
- uint32_t numElems = a->getType()->getVectorNumElements();
-#endif
- Value* input = BITCAST(a, getVectorType(mFP16Ty, numElems));
-
- return FP_EXT(input, getVectorType(mFP32Ty, numElems), name);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
- /// in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
- Value* Builder::CVTPS2PH(Value* a, Value* rounding)
- {
- if (JM()->mArch.F16C())
- {
- return VCVTPS2PH(a, rounding);
- }
- else
- {
- // call scalar C function for now
- FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty);
- Function* pCvtPs2Ph = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
-#endif
-
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16",
- (void*)&ConvertFloat32ToFloat16);
- }
-
- Value* pResult = UndefValue::get(mSimdInt16Ty);
- for (uint32_t i = 0; i < mVWidth; ++i)
- {
- Value* pSrc = VEXTRACT(a, C(i));
- Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
- pResult = VINSERT(pResult, pConv, C(i));
- }
-
- return pResult;
- }
- }
-
- Value* Builder::PMAXSD(Value* a, Value* b)
- {
- Value* cmp = ICMP_SGT(a, b);
- return SELECT(cmp, a, b);
- }
-
- Value* Builder::PMINSD(Value* a, Value* b)
- {
- Value* cmp = ICMP_SLT(a, b);
- return SELECT(cmp, a, b);
- }
-
- Value* Builder::PMAXUD(Value* a, Value* b)
- {
- Value* cmp = ICMP_UGT(a, b);
- return SELECT(cmp, a, b);
- }
-
- Value* Builder::PMINUD(Value* a, Value* b)
- {
- Value* cmp = ICMP_ULT(a, b);
- return SELECT(cmp, a, b);
- }
-
- // Helper function to create alloca in entry block of function
- Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
- {
- auto saveIP = IRB()->saveIP();
- IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
- Value* pAlloca = ALLOCA(pType);
- if (saveIP.isSet())
- IRB()->restoreIP(saveIP);
- return pAlloca;
- }
-
- Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize)
- {
- auto saveIP = IRB()->saveIP();
- IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
- Value* pAlloca = ALLOCA(pType, pArraySize);
- if (saveIP.isSet())
- IRB()->restoreIP(saveIP);
- return pAlloca;
- }
-
- Value* Builder::VABSPS(Value* a)
- {
- Value* asInt = BITCAST(a, mSimdInt32Ty);
- Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty);
- return result;
- }
-
- Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
- {
- Value* lowCmp = ICMP_SLT(src, low);
- Value* ret = SELECT(lowCmp, low, src);
-
- Value* highCmp = ICMP_SGT(ret, high);
- ret = SELECT(highCmp, high, ret, name);
-
- return ret;
- }
-
- Value* Builder::FCLAMP(Value* src, Value* low, Value* high)
- {
- Value* lowCmp = FCMP_OLT(src, low);
- Value* ret = SELECT(lowCmp, low, src);
-
- Value* highCmp = FCMP_OGT(ret, high);
- ret = SELECT(highCmp, high, ret);
-
- return ret;
- }
-
- Value* Builder::FCLAMP(Value* src, float low, float high)
- {
- Value* result = VMAXPS(src, VIMMED1(low));
- result = VMINPS(result, VIMMED1(high));
-
- return result;
- }
-
- Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
- {
- Value* vOut;
- // This maps to LLVM fmuladd intrinsic
- vOut = VFMADDPS(a, b, c);
- return vOut;
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief pop count on vector mask (e.g. <8 x i1>)
- Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Float / Fixed-point conversions
- //////////////////////////////////////////////////////////////////////////
- Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- Value* fixed = nullptr;
-
-#if 0 // This doesn't work for negative numbers!!
- {
- fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT)),
- mSimdInt32Ty);
- }
- else
-#endif
- {
- // Do round to nearest int on fractional bits first
- // Not entirely perfect for negative numbers, but close enough
- vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT));
- vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
-
- // TODO: Handle INF, NAN, overflow / underflow, etc.
-
- Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f));
- Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
- Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1));
- vFixed = OR(vFixed, VIMMED1(1 << 23));
- vFixed = SELECT(vSgn, NEG(vFixed), vFixed);
-
- Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
- vExp = SUB(vExp, VIMMED1(127));
-
- Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
-
- fixed = ASHR(vFixed, vExtraBits, name);
- }
-
- return fixed;
- }
-
- Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- uint32_t extraBits = 32 - numIntBits - numFracBits;
- if (numIntBits && extraBits)
- {
- // Sign extend
- Value* shftAmt = VIMMED1(extraBits);
- vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt);
- }
-
- Value* fVal = VIMMED1(0.0f);
- Value* fFrac = VIMMED1(0.0f);
- if (numIntBits)
- {
- fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
- }
-
- if (numFracBits)
- {
- fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
- fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
- }
-
- return FADD(fVal, fFrac, name);
- }
-
- Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- Value* fixed = nullptr;
-#if 1 // KNOB_SIM_FAST_MATH? Below works correctly from a precision
- // standpoint...
- {
- fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT)),
- mSimdInt32Ty);
- }
-#else
- {
- // Do round to nearest int on fractional bits first
- vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
- C(_MM_FROUND_TO_NEAREST_INT));
- vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
-
- // TODO: Handle INF, NAN, overflow / underflow, etc.
-
- Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f));
- Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
- Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1));
- vFixed = OR(vFixed, VIMMED1(1 << 23));
-
- Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
- vExp = SUB(vExp, VIMMED1(127));
-
- Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
-
- fixed = LSHR(vFixed, vExtraBits, name);
- }
-#endif
- return fixed;
- }
-
- Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name)
- {
- SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
- uint32_t extraBits = 32 - numIntBits - numFracBits;
- if (numIntBits && extraBits)
- {
- // Sign extend
- Value* shftAmt = VIMMED1(extraBits);
- vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt);
- }
-
- Value* fVal = VIMMED1(0.0f);
- Value* fFrac = VIMMED1(0.0f);
- if (numIntBits)
- {
- fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
- }
-
- if (numFracBits)
- {
- fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
- fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
- }
-
- return FADD(fVal, fFrac, name);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief C functions called by LLVM IR
- //////////////////////////////////////////////////////////////////////////
-
- Value* Builder::VEXTRACTI128(Value* a, Constant* imm8)
- {
- bool flag = !imm8->isZeroValue();
- SmallVector<Constant*, 8> idx;
- for (unsigned i = 0; i < mVWidth / 2; i++)
- {
- idx.push_back(C(flag ? i + mVWidth / 2 : i));
- }
- return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
- }
-
- Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
- {
- bool flag = !imm8->isZeroValue();
- SmallVector<Constant*, 8> idx;
- for (unsigned i = 0; i < mVWidth; i++)
- {
- idx.push_back(C(i));
- }
- Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
-
- SmallVector<Constant*, 8> idx2;
- for (unsigned i = 0; i < mVWidth / 2; i++)
- {
- idx2.push_back(C(flag ? i : i + mVWidth));
- }
- for (unsigned i = mVWidth / 2; i < mVWidth; i++)
- {
- idx2.push_back(C(flag ? i + mVWidth / 2 : i));
- }
- return VSHUFFLE(a, inter, ConstantVector::get(idx2));
- }
-
- // rdtsc buckets macros
- void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
- {
- // @todo due to an issue with thread local storage propagation in llvm, we can only safely
- // call into buckets framework when single threaded
- if (KNOB_SINGLE_THREADED)
- {
- std::vector<Type*> args{
- PointerType::get(mInt32Ty, 0), // pBucketMgr
- mInt32Ty // id
- };
-
- FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
- Function* pFunc = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
-#endif
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") ==
- nullptr)
- {
- sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket",
- (void*)&BucketManager_StartBucket);
- }
-
- CALL(pFunc, {pBucketMgr, pId});
- }
- }
-
- void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
- {
- // @todo due to an issue with thread local storage propagation in llvm, we can only safely
- // call into buckets framework when single threaded
- if (KNOB_SINGLE_THREADED)
- {
- std::vector<Type*> args{
- PointerType::get(mInt32Ty, 0), // pBucketMgr
- mInt32Ty // id
- };
-
- FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
- Function* pFunc = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
-#endif
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") ==
- nullptr)
- {
- sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket",
- (void*)&BucketManager_StopBucket);
- }
-
- CALL(pFunc, {pBucketMgr, pId});
- }
- }
-
- uint32_t Builder::GetTypeSize(Type* pType)
- {
- if (pType->isStructTy())
- {
- uint32_t numElems = pType->getStructNumElements();
- Type* pElemTy = pType->getStructElementType(0);
- return numElems * GetTypeSize(pElemTy);
- }
-
- if (pType->isArrayTy())
- {
- uint32_t numElems = pType->getArrayNumElements();
- Type* pElemTy = pType->getArrayElementType();
- return numElems * GetTypeSize(pElemTy);
- }
-
- if (pType->isIntegerTy())
- {
- uint32_t bitSize = pType->getIntegerBitWidth();
- return bitSize / 8;
- }
-
- if (pType->isFloatTy())
- {
- return 4;
- }
-
- if (pType->isHalfTy())
- {
- return 2;
- }
-
- if (pType->isDoubleTy())
- {
- return 8;
- }
-
- SWR_ASSERT(false, "Unimplemented type.");
- return 0;
- }
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
deleted file mode 100644
index a7d69eaf9d0..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file builder_misc.h
- *
- * @brief miscellaneous builder functions
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-Constant* C(bool i);
-Constant* C(char i);
-Constant* C(uint8_t i);
-Constant* C(int i);
-Constant* C(int64_t i);
-Constant* C(uint64_t i);
-Constant* C(uint16_t i);
-Constant* C(uint32_t i);
-Constant* C(float i);
-
-template <typename Ty>
-Constant* C(const std::initializer_list<Ty>& constList)
-{
- std::vector<Constant*> vConsts;
- for (auto i : constList)
- {
- vConsts.push_back(C((Ty)i));
- }
- return ConstantVector::get(vConsts);
-}
-
-template <typename Ty>
-Constant* C(const std::vector<Ty>& constList)
-{
- std::vector<Constant*> vConsts;
- for (auto i : constList)
- {
- vConsts.push_back(C((Ty)i));
- }
- return ConstantVector::get(vConsts);
-}
-
-template <typename Ty>
-Constant* CA(LLVMContext& ctx, ArrayRef<Ty> constList)
-{
- return ConstantDataArray::get(ctx, constList);
-}
-
-template <typename Ty>
-Constant* CInc(uint32_t base, uint32_t count)
-{
- std::vector<Constant*> vConsts;
-
- for (uint32_t i = 0; i < count; i++)
- {
- vConsts.push_back(C((Ty)base));
- base++;
- }
- return ConstantVector::get(vConsts);
-}
-
-Constant* PRED(bool pred);
-
-Value* VIMMED1(uint64_t i);
-Value* VIMMED1_16(uint64_t i);
-
-Value* VIMMED1(int i);
-Value* VIMMED1_16(int i);
-
-Value* VIMMED1(uint32_t i);
-Value* VIMMED1_16(uint32_t i);
-
-Value* VIMMED1(float i);
-Value* VIMMED1_16(float i);
-
-Value* VIMMED1(bool i);
-Value* VIMMED1_16(bool i);
-
-Value* VUNDEF(Type* t);
-
-Value* VUNDEF_F();
-Value* VUNDEF_F_16();
-
-Value* VUNDEF_I();
-Value* VUNDEF_I_16();
-
-Value* VUNDEF(Type* ty, uint32_t size);
-
-Value* VUNDEF_IPTR();
-
-Value* VBROADCAST(Value* src, const llvm::Twine& name = "");
-Value* VBROADCAST_16(Value* src);
-
-Value* VRCP(Value* va, const llvm::Twine& name = "");
-Value* VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY);
-
-uint32_t IMMED(Value* i);
-int32_t S_IMMED(Value* i);
-
-CallInst* CALL(Value* Callee, const std::initializer_list<Value*>& args, const llvm::Twine& name = "");
-CallInst* CALL(Value* Callee)
-{
-#if LLVM_VERSION_MAJOR >= 11
- // Not a great idea - we loose type info (Function) calling CALL
- // and then we recast it here. Good for now, but needs to be
- // more clean - optimally just always CALL a Function
- return CALLA(FunctionCallee(cast<Function>(Callee)));
-#else
- return CALLA(Callee);
-#endif
-}
-CallInst* CALL(Value* Callee, Value* arg);
-CallInst* CALL2(Value* Callee, Value* arg1, Value* arg2);
-CallInst* CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3);
-
-Value* MASK(Value* vmask);
-Value* MASK_16(Value* vmask);
-
-Value* VMASK(Value* mask);
-Value* VMASK_16(Value* mask);
-
-Value* VMOVMSK(Value* mask);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Float / Fixed-point conversions
-//////////////////////////////////////////////////////////////////////////
-// Signed
-Value* VCVT_F32_FIXED_SI(Value* vFloat,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name = "");
-Value* VCVT_FIXED_SI_F32(Value* vFixed,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name = "");
-// Unsigned
-Value* VCVT_F32_FIXED_UI(Value* vFloat,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name = "");
-Value* VCVT_FIXED_UI_F32(Value* vFixed,
- uint32_t numIntBits,
- uint32_t numFracBits,
- const llvm::Twine& name = "");
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief functions that build IR to call x86 intrinsics directly, or
-/// emulate them with other instructions if not available on the host
-//////////////////////////////////////////////////////////////////////////
-
-Value* EXTRACT_16(Value* x, uint32_t imm);
-Value* JOIN_16(Value* a, Value* b);
-
-Value* PSHUFB(Value* a, Value* b);
-Value* PMOVSXBD(Value* a);
-Value* PMOVSXWD(Value* a);
-Value* CVTPH2PS(Value* a, const llvm::Twine& name = "");
-Value* CVTPS2PH(Value* a, Value* rounding);
-Value* PMAXSD(Value* a, Value* b);
-Value* PMINSD(Value* a, Value* b);
-Value* PMAXUD(Value* a, Value* b);
-Value* PMINUD(Value* a, Value* b);
-Value* VABSPS(Value* a);
-Value* FMADDPS(Value* a, Value* b, Value* c);
-
-Value* ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name = "");
-Value* FCLAMP(Value* src, Value* low, Value* high);
-Value* FCLAMP(Value* src, float low, float high);
-
-CallInst* PRINT(const std::string& printStr);
-CallInst* PRINT(const std::string& printStr, const std::initializer_list<Value*>& printArgs);
-
-Value* VPOPCNT(Value* a);
-
-Value* INT3()
-{
- return DEBUGTRAP();
-}
-
-
-Value* VEXTRACTI128(Value* a, Constant* imm8);
-Value* VINSERTI128(Value* a, Value* b, Constant* imm8);
-
-// rdtsc buckets macros
-void RDTSC_START(Value* pBucketMgr, Value* pId);
-void RDTSC_STOP(Value* pBucketMgr, Value* pId);
-
-Value* CreateEntryAlloca(Function* pFunc, Type* pType);
-Value* CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize);
-
-uint32_t GetTypeSize(Type* pType);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
deleted file mode 100644
index bd5f7588c91..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ /dev/null
@@ -1,2332 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file fetch_jit.cpp
- *
- * @brief Implementation of the fetch jitter
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder_gfx_mem.h"
-#include "jit_api.h"
-#include "fetch_jit.h"
-#include "gen_state_llvm.h"
-#include "functionpasses/passes.h"
-
-//#define FETCH_DUMP_VERTEX 1
-using namespace llvm;
-using namespace SwrJit;
-
-bool isComponentEnabled(ComponentEnable enableMask, uint8_t component);
-
-enum ConversionType
-{
- CONVERT_NONE,
- CONVERT_NORMALIZED,
- CONVERT_USCALED,
- CONVERT_SSCALED,
- CONVERT_SFIXED,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// Interface to Jitting a fetch shader
-//////////////////////////////////////////////////////////////////////////
-struct FetchJit : public BuilderGfxMem
-{
- FetchJit(JitManager* pJitMgr) : BuilderGfxMem(pJitMgr), mpFetchInfo(NULL) {}
-
- Function* Create(const FETCH_COMPILE_STATE& fetchState);
-
- Value* GetSimdValid32bitIndices(Value* vIndices, Value* pLastIndex);
- Value* GetSimdValid16bitIndices(Value* vIndices, Value* pLastIndex);
- Value* GetSimdValid8bitIndices(Value* vIndices, Value* pLastIndex);
- template <typename T>
- Value* GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex);
-
- // package up Shuffle*bpcGatherd args into a tuple for convenience
- typedef std::tuple<Value*&,
- Value*,
- const Instruction::CastOps,
- const ConversionType,
- uint32_t&,
- uint32_t&,
- const ComponentEnable,
- const ComponentControl (&)[4],
- Value* (&)[4],
- const uint32_t (&)[4]>
- Shuffle8bpcArgs;
-
- void Shuffle8bpcGatherd16(Shuffle8bpcArgs& args);
- void Shuffle8bpcGatherd(Shuffle8bpcArgs& args);
-
- typedef std::tuple<Value* (&)[2],
- Value*,
- const Instruction::CastOps,
- const ConversionType,
- uint32_t&,
- uint32_t&,
- const ComponentEnable,
- const ComponentControl (&)[4],
- Value* (&)[4]>
- Shuffle16bpcArgs;
-
- void Shuffle16bpcGather16(Shuffle16bpcArgs& args);
- void Shuffle16bpcGather(Shuffle16bpcArgs& args);
-
- void StoreVertexElements(Value* pVtxOut,
- const uint32_t outputElt,
- const uint32_t numEltsToStore,
- Value* (&vVertexElements)[4]);
-
- Value* GenerateCompCtrlVector(const ComponentControl ctrl);
-
- void JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
- Value* streams,
- Value* vIndices,
- Value* pVtxOut);
-
- bool IsOddFormat(SWR_FORMAT format);
- bool IsUniformFormat(SWR_FORMAT format);
- void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]);
- void CreateGatherOddFormats(
- SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
- void ConvertFormat(SWR_FORMAT format, Value* texels[4]);
-
- Value* mpFetchInfo;
-};
-
-Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
-{
- std::stringstream fnName("FCH_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
- fnName << ComputeCRC(0, &fetchState, sizeof(fetchState));
-
- Function* fetch = Function::Create(
- JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
- BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", fetch);
-
- fetch->getParent()->setModuleIdentifier(fetch->getName());
-
- IRB()->SetInsertPoint(entry);
-
- auto argitr = fetch->arg_begin();
-
- // Fetch shader arguments
- Value* privateContext = &*argitr;
- ++argitr;
- privateContext->setName("privateContext");
- SetPrivateContext(privateContext);
-
- mpWorkerData = &*argitr;
- ++argitr;
- mpWorkerData->setName("pWorkerData");
-
- mpFetchInfo = &*argitr;
- ++argitr;
- mpFetchInfo->setName("fetchInfo");
- Value* pVtxOut = &*argitr;
- pVtxOut->setName("vtxOutput");
-
- uint32_t baseWidth = mVWidth;
-
- SWR_ASSERT(mVWidth == 8 || mVWidth == 16, "Unsupported vector width %d", mVWidth);
-
- // Override builder target width to force 16-wide SIMD
-#if USE_SIMD16_SHADERS
- SetTargetWidth(16);
-#endif
-
- pVtxOut = BITCAST(pVtxOut, PointerType::get(mSimdFP32Ty, 0));
-
- // SWR_FETCH_CONTEXT::pStreams
- Value* streams = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_pStreams});
- streams->setName("pStreams");
-
- // SWR_FETCH_CONTEXT::pIndices
- Value* indices = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_xpIndices});
- indices->setName("pIndices");
-
- // SWR_FETCH_CONTEXT::pLastIndex
- Value* pLastIndex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_xpLastIndex});
- pLastIndex->setName("pLastIndex");
-
- Value* vIndices;
- switch (fetchState.indexType)
- {
- case R8_UINT:
- indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
- if (fetchState.bDisableIndexOOBCheck)
- {
- vIndices = LOAD(
- BITCAST(indices, PointerType::get(getVectorType(mInt8Ty, mpJitMgr->mVWidth), 0)),
- {(uint32_t)0});
- vIndices = Z_EXT(vIndices, mSimdInt32Ty);
- }
- else
- {
- vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
- }
- break;
- case R16_UINT:
- if (fetchState.bDisableIndexOOBCheck)
- {
- vIndices = LOAD(
- BITCAST(indices, PointerType::get(getVectorType(mInt16Ty, mpJitMgr->mVWidth), 0)),
- {(uint32_t)0});
- vIndices = Z_EXT(vIndices, mSimdInt32Ty);
- }
- else
- {
- vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
- }
- break;
- case R32_UINT:
- (fetchState.bDisableIndexOOBCheck)
- ? vIndices = LOAD(indices,
- "",
- PointerType::get(mSimdInt32Ty, 0),
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH)
- : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
- break; // incoming type is already 32bit int
- default:
- vIndices = nullptr;
- assert(false && "Unsupported index type");
- break;
- }
-
- if (fetchState.bForceSequentialAccessEnable)
- {
- Value* pOffsets = mVWidth == 8 ? C({0, 1, 2, 3, 4, 5, 6, 7})
- : C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
-
- // VertexData buffers are accessed sequentially, the index is equal to the vertex number
- vIndices = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}));
- vIndices = ADD(vIndices, pOffsets);
- }
-
- Value* vVertexId = vIndices;
- if (fetchState.bVertexIDOffsetEnable)
- {
- // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally
- // correct
- Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex}));
- Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}));
- vVertexId = ADD(vIndices, vBaseVertex);
- vVertexId = ADD(vVertexId, vStartVertex);
- }
-
- // store out vertex IDs
- if (mVWidth == 16)
- {
- // store out in simd8 halves until core supports 16-wide natively
- auto vVertexIdLo = EXTRACT_16(vVertexId, 0);
- auto vVertexIdHi = EXTRACT_16(vVertexId, 1);
- STORE(vVertexIdLo, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID}));
- STORE(vVertexIdHi, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID2}));
- }
- else if (mVWidth == 8)
- {
- STORE(vVertexId, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID}));
- }
-
- // store out cut mask if enabled
- if (fetchState.bEnableCutIndex)
- {
- Value* vCutIndex = VIMMED1(fetchState.cutIndex);
- Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
-
- if (mVWidth == 16)
- {
- auto cutMaskLo = EXTRACT_16(cutMask, 0);
- auto cutMaskHi = EXTRACT_16(cutMask, 1);
- STORE(cutMaskLo, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask}));
- STORE(cutMaskHi, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask2}));
- }
- else if (mVWidth == 8)
- {
- STORE(cutMask, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask}));
- }
- }
-
- // Fetch attributes from memory and output to a simdvertex struct
- JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
-
- RET_VOID();
-
- JitManager::DumpToFile(fetch, "src");
-
-#if defined(_DEBUG)
- verifyFunction(*fetch);
-#endif
-
- ::FunctionPassManager setupPasses(JM()->mpCurrentModule);
-
- ///@todo We don't need the CFG passes for fetch. (e.g. BreakCriticalEdges and CFGSimplification)
- setupPasses.add(createBreakCriticalEdgesPass());
- setupPasses.add(createCFGSimplificationPass());
- setupPasses.add(createEarlyCSEPass());
- setupPasses.add(createPromoteMemoryToRegisterPass());
-
- setupPasses.run(*fetch);
-
- JitManager::DumpToFile(fetch, "se");
-
- ::FunctionPassManager optPasses(JM()->mpCurrentModule);
-
- ///@todo Haven't touched these either. Need to remove some of these and add others.
- optPasses.add(createCFGSimplificationPass());
- optPasses.add(createEarlyCSEPass());
- optPasses.add(createInstructionCombiningPass());
-#if LLVM_VERSION_MAJOR <= 11
- optPasses.add(createConstantPropagationPass());
-#endif
- optPasses.add(createSCCPPass());
- optPasses.add(createAggressiveDCEPass());
-
- optPasses.run(*fetch);
-
- optPasses.add(createLowerX86Pass(this));
- optPasses.run(*fetch);
-
- JitManager::DumpToFile(fetch, "opt");
-
-
- // Revert 16-wide override
-#if USE_SIMD16_SHADERS
- SetTargetWidth(baseWidth);
-#endif
-
- return fetch;
-}
-
-// returns true for odd formats that require special state.gather handling
-bool FetchJit::IsOddFormat(SWR_FORMAT format)
-{
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32 && info.bpc[0] != 64)
- {
- return true;
- }
- return false;
-}
-
-// format is uniform if all components are the same size and type
-bool FetchJit::IsUniformFormat(SWR_FORMAT format)
-{
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- uint32_t bpc0 = info.bpc[0];
- uint32_t type0 = info.type[0];
-
- for (uint32_t c = 1; c < info.numComps; ++c)
- {
- if (bpc0 != info.bpc[c] || type0 != info.type[c])
- {
- return false;
- }
- }
- return true;
-}
-
-// unpacks components based on format
-// foreach component in the pixel
-// mask off everything but this component
-// shift component to LSB
-void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4])
-{
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-
- uint32_t bitOffset = 0;
- for (uint32_t c = 0; c < info.numComps; ++c)
- {
- uint32_t swizzledIndex = info.swizzle[c];
- uint32_t compBits = info.bpc[c];
- uint32_t bitmask = ((1 << compBits) - 1) << bitOffset;
- Value* comp = AND(vInput, bitmask);
- comp = LSHR(comp, bitOffset);
-
- result[swizzledIndex] = comp;
- bitOffset += compBits;
- }
-}
-
-// gather for odd component size formats
-// gather SIMD full pixels per lane then shift/mask to move each component to their
-// own vector
-void FetchJit::CreateGatherOddFormats(
- SWR_FORMAT format, Value* pMask, Value* xpBase, Value* pOffsets, Value* pResult[4])
-{
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-
- // only works if pixel size is <= 32bits
- SWR_ASSERT(info.bpp <= 32);
-
- Value* pGather;
- if (info.bpp == 32)
- {
- pGather =
- GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- }
- else
- {
- // Can't use 32-bit gather for items less than 32-bits, could cause page faults.
- Value* pMem = ALLOCA(mSimdInt32Ty);
- STORE(VIMMED1(0u), pMem);
-
- Value* pDstMem = POINTER_CAST(pMem, mInt32PtrTy);
-
- for (uint32_t lane = 0; lane < mVWidth; ++lane)
- {
- // Get index
- Value* index = VEXTRACT(pOffsets, C(lane));
- Value* mask = VEXTRACT(pMask, C(lane));
-
- // use branch around load based on mask
- // Needed to avoid page-faults on unmasked lanes
- BasicBlock* pCurrentBB = IRB()->GetInsertBlock();
- BasicBlock* pMaskedLoadBlock =
- BasicBlock::Create(JM()->mContext, "MaskedLaneLoad", pCurrentBB->getParent());
- BasicBlock* pEndLoadBB =
- BasicBlock::Create(JM()->mContext, "AfterMaskedLoad", pCurrentBB->getParent());
-
- COND_BR(mask, pMaskedLoadBlock, pEndLoadBB);
-
- JM()->mBuilder.SetInsertPoint(pMaskedLoadBlock);
-
- switch (info.bpp)
- {
- case 8:
- {
- Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0));
- Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
- break;
- }
-
- case 16:
- {
- Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
- Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
- break;
- }
- break;
-
- case 24:
- {
- // First 16-bits of data
- Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0));
- Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType()));
- STORE(LOAD(xpSrc, "", mInt16PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
-
- // Last 8-bits of data
- pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0));
- xpSrc = ADD(xpSrc, C((int64_t)2));
- STORE(LOAD(xpSrc, "", mInt8PtrTy, MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst);
- break;
- }
-
- default:
- SWR_INVALID("Shouldn't have BPP = %d now", info.bpp);
- break;
- }
-
- BR(pEndLoadBB);
- JM()->mBuilder.SetInsertPoint(pEndLoadBB);
- }
-
- pGather = LOAD(pMem);
- }
-
- for (uint32_t comp = 0; comp < 4; ++comp)
- {
- pResult[comp] = VIMMED1((int)info.defaults[comp]);
- }
-
- UnpackComponents(format, pGather, pResult);
-
- // cast to fp32
- pResult[0] = BITCAST(pResult[0], mSimdFP32Ty);
- pResult[1] = BITCAST(pResult[1], mSimdFP32Ty);
- pResult[2] = BITCAST(pResult[2], mSimdFP32Ty);
- pResult[3] = BITCAST(pResult[3], mSimdFP32Ty);
-}
-
-void FetchJit::ConvertFormat(SWR_FORMAT format, Value* texels[4])
-{
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-
- for (uint32_t c = 0; c < info.numComps; ++c)
- {
- uint32_t compIndex = info.swizzle[c];
-
- // skip any conversion on UNUSED components
- if (info.type[c] == SWR_TYPE_UNUSED)
- {
- continue;
- }
-
- if (info.isNormalized[c])
- {
- if (info.type[c] == SWR_TYPE_SNORM)
- {
- /// @todo The most-negative value maps to -1.0f. e.g. the 5-bit value 10000 maps to
- /// -1.0f.
-
- /// result = c * (1.0f / (2^(n-1) - 1);
- uint32_t n = info.bpc[c];
- uint32_t pow2 = 1 << (n - 1);
- float scale = 1.0f / (float)(pow2 - 1);
- Value* vScale = VIMMED1(scale);
- texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
- texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
- texels[compIndex] = FMUL(texels[compIndex], vScale);
- }
- else
- {
- SWR_ASSERT(info.type[c] == SWR_TYPE_UNORM);
-
- /// result = c * (1.0f / (2^n - 1))
- uint32_t n = info.bpc[c];
- uint32_t pow2 = 1 << n;
- // special case 24bit unorm format, which requires a full divide to meet ULP
- // requirement
- if (n == 24)
- {
- float scale = (float)(pow2 - 1);
- Value* vScale = VIMMED1(scale);
- texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
- texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
- texels[compIndex] = FDIV(texels[compIndex], vScale);
- }
- else
- {
- float scale = 1.0f / (float)(pow2 - 1);
- Value* vScale = VIMMED1(scale);
- texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
- texels[compIndex] = UI_TO_FP(texels[compIndex], mSimdFP32Ty);
- texels[compIndex] = FMUL(texels[compIndex], vScale);
- }
- }
- continue;
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads attributes from memory using AVX2 GATHER(s)
-/// @param fetchState - info about attributes to be fetched from memory
-/// @param streams - value pointer to the current vertex stream
-/// @param vIndices - vector value of indices to gather
-/// @param pVtxOut - value pointer to output simdvertex struct
-void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
- Value* streams,
- Value* vIndices,
- Value* pVtxOut)
-{
- uint32_t currentVertexElement = 0;
- uint32_t outputElt = 0;
- Value* vVertexElements[4];
-
- Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex});
- Value* startInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance});
- Value* curInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance});
- Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex}));
- curInstance->setName("curInstance");
-
- for (uint32_t nInputElt = 0; nInputElt < fetchState.numAttribs; nInputElt += 1)
- {
- const INPUT_ELEMENT_DESC& ied = fetchState.layout[nInputElt];
-
- // skip element if all components are disabled
- if (ied.ComponentPacking == ComponentEnable::NONE)
- {
- continue;
- }
-
- const SWR_FORMAT_INFO& info = GetFormatInfo((SWR_FORMAT)ied.Format);
- SWR_ASSERT((info.bpp != 0), "Unsupported format in JitGatherVertices.");
- uint32_t bpc =
- info.bpp /
- info.numComps; ///@todo Code below assumes all components are same size. Need to fix.
-
- Value* stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
-
- Value* stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
- Value* vStride = VBROADCAST(stride);
-
- // max vertex index that is fully in bounds
- Value* maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
- maxVertex = LOAD(maxVertex);
-
- Value* minVertex = NULL;
- if (fetchState.bPartialVertexBuffer)
- {
- // min vertex index for low bounds OOB checking
- minVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_minVertex)});
- minVertex = LOAD(minVertex);
- }
-
- if (fetchState.bInstanceIDOffsetEnable)
- {
- // the InstanceID (curInstance) value is offset by StartInstanceLocation
- curInstance = ADD(curInstance, startInstance);
- }
-
- Value* vCurIndices;
- Value* startOffset;
- Value* vInstanceStride = VIMMED1(0);
-
- if (ied.InstanceEnable)
- {
- Value* stepRate = C(ied.InstanceAdvancementState);
-
- // prevent a div by 0 for 0 step rate
- Value* isNonZeroStep = ICMP_UGT(stepRate, C(0));
- stepRate = SELECT(isNonZeroStep, stepRate, C(1));
-
- // calc the current offset into instanced data buffer
- Value* calcInstance = UDIV(curInstance, stepRate);
-
- // if step rate is 0, every instance gets instance 0
- calcInstance = SELECT(isNonZeroStep, calcInstance, C(0));
-
- vCurIndices = VBROADCAST(calcInstance);
- startOffset = startInstance;
- }
- else if (ied.InstanceStrideEnable)
- {
- // grab the instance advancement state, determines stride in bytes from one instance to
- // the next
- Value* stepRate = C(ied.InstanceAdvancementState);
- vInstanceStride = VBROADCAST(MUL(curInstance, stepRate));
-
- // offset indices by baseVertex
- vCurIndices = ADD(vIndices, vBaseVertex);
-
- startOffset = startVertex;
- SWR_ASSERT((0), "TODO: Fill out more once driver sends this down.");
- }
- else
- {
- // offset indices by baseVertex
- vCurIndices = ADD(vIndices, vBaseVertex);
- startOffset = startVertex;
- }
-
- // All of the OOB calculations are in vertices, not VB offsets, to prevent having to
- // do 64bit address offset calculations.
-
- // calculate byte offset to the start of the VB
- Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
-
- // VGATHER* takes an *i8 src pointer so that's what stream is
- Value* pStreamBaseGFX = ADD(stream, baseOffset);
-
- // if we have a start offset, subtract from max vertex. Used for OOB check
- maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
- Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0));
- // if we have a negative value, we're already OOB. clamp at 0.
- maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty));
-
- if (fetchState.bPartialVertexBuffer)
- {
- // similary for min vertex
- minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
- Value* minNeg = ICMP_SLT(minVertex, C((int64_t)0));
- minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
- }
-
- // Load the in bounds size of a partially valid vertex
- Value* partialInboundsSize =
- GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
- partialInboundsSize = LOAD(partialInboundsSize);
- Value* vPartialVertexSize = VBROADCAST(partialInboundsSize);
- Value* vBpp = VBROADCAST(C(info.Bpp));
- Value* vAlignmentOffsets = VBROADCAST(C(ied.AlignedByteOffset));
-
- // is the element is <= the partially valid size
- Value* vElementInBoundsMask = ICMP_SLE(vBpp, SUB(vPartialVertexSize, vAlignmentOffsets));
-
- // override cur indices with 0 if pitch is 0
- Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0));
- vCurIndices = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices);
-
- // are vertices partially OOB?
- Value* vMaxVertex = VBROADCAST(maxVertex);
- Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex);
-
- // are vertices fully in bounds?
- Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
-
- Value* vGatherMask;
- if (fetchState.bPartialVertexBuffer)
- {
- // are vertices below minVertex limit?
- Value* vMinVertex = VBROADCAST(minVertex);
- Value* vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
-
- // only fetch lanes that pass both tests
- vGatherMask = AND(vMaxGatherMask, vMinGatherMask);
- }
- else
- {
- vGatherMask = vMaxGatherMask;
- }
-
- // blend in any partially OOB indices that have valid elements
- vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, vGatherMask);
-
- // calculate the actual offsets into the VB
- Value* vOffsets = MUL(vCurIndices, vStride);
- vOffsets = ADD(vOffsets, vAlignmentOffsets);
-
- // if instance stride enable is:
- // true - add product of the instanceID and advancement state to the offset into the VB
- // false - value of vInstanceStride has been initialized to zero
- vOffsets = ADD(vOffsets, vInstanceStride);
-
- // Packing and component control
- ComponentEnable compMask = (ComponentEnable)ied.ComponentPacking;
- const ComponentControl compCtrl[4]{(ComponentControl)ied.ComponentControl0,
- (ComponentControl)ied.ComponentControl1,
- (ComponentControl)ied.ComponentControl2,
- (ComponentControl)ied.ComponentControl3};
-
- // Special gather/conversion for formats without equal component sizes
- if (IsOddFormat((SWR_FORMAT)ied.Format))
- {
- Value* pResults[4];
- CreateGatherOddFormats(
- (SWR_FORMAT)ied.Format, vGatherMask, pStreamBaseGFX, vOffsets, pResults);
- ConvertFormat((SWR_FORMAT)ied.Format, pResults);
-
- for (uint32_t c = 0; c < 4; c += 1)
- {
- if (isComponentEnabled(compMask, c))
- {
- vVertexElements[currentVertexElement++] = pResults[c];
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- else if (info.type[0] == SWR_TYPE_FLOAT)
- {
- ///@todo: support 64 bit vb accesses
- Value* gatherSrc = VIMMED1(0.0f);
-
- SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
- "Unsupported format for standard gather fetch.");
-
- // Gather components from memory to store in a simdvertex structure
- switch (bpc)
- {
- case 16:
- {
- Value* vGatherResult[2];
-
- // if we have at least one component out of x or y to fetch
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- // e.g. result of first 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
- //
- }
-
- // if we have at least one component out of z or w to fetch
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- // offset base to the next components(zw) in the vertex to gather
- pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
-
- vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- // e.g. result of second 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
- //
- }
-
- // if we have at least one component to shuffle into place
- if (compMask)
- {
- Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult,
- pVtxOut,
- Instruction::CastOps::FPExt,
- CONVERT_NONE,
- currentVertexElement,
- outputElt,
- compMask,
- compCtrl,
- vVertexElements);
-
- // Shuffle gathered components into place in simdvertex struct
- mVWidth == 16 ? Shuffle16bpcGather16(args)
- : Shuffle16bpcGather(args); // outputs to vVertexElements ref
- }
- }
- break;
- case 32:
- {
- for (uint32_t i = 0; i < 4; i += 1)
- {
- if (isComponentEnabled(compMask, i))
- {
- // if we need to gather the component
- if (compCtrl[i] == StoreSrc)
- {
- // Gather a SIMD of vertices
- // APIs allow a 4GB range for offsets
- // However, GATHERPS uses signed 32-bit offsets, so +/- 2GB range :(
- // Add 2GB to the base pointer and 2GB to the offsets. This makes
- // "negative" (large) offsets into positive offsets and small offsets
- // into negative offsets.
- Value* vNewOffsets = ADD(vOffsets, VIMMED1(0x80000000));
- vVertexElements[currentVertexElement++] =
- GATHERPS(gatherSrc,
- ADD(pStreamBaseGFX, C((uintptr_t)0x80000000U)),
- vNewOffsets,
- vGatherMask,
- 1,
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- }
- else
- {
- vVertexElements[currentVertexElement++] =
- GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
-
- // offset base to the next component in the vertex to gather
- pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
- }
- }
- break;
- case 64:
- {
- for (uint32_t i = 0; i < 4; i += 1)
- {
- if (isComponentEnabled(compMask, i))
- {
- // if we need to gather the component
- if (compCtrl[i] == StoreSrc)
- {
- Value* vShufLo;
- Value* vShufHi;
- Value* vShufAll;
-
- if (mVWidth == 8)
- {
- vShufLo = C({0, 1, 2, 3});
- vShufHi = C({4, 5, 6, 7});
- vShufAll = C({0, 1, 2, 3, 4, 5, 6, 7});
- }
- else
- {
- SWR_ASSERT(mVWidth == 16);
- vShufLo = C({0, 1, 2, 3, 4, 5, 6, 7});
- vShufHi = C({8, 9, 10, 11, 12, 13, 14, 15});
- vShufAll =
- C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
- }
-
- Value* vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
- Value* vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
-
- Value* vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
- Value* vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
-
- Value* vZeroDouble = VECTOR_SPLAT(
- mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
-
- Value* pGatherLo =
- GATHERPD(vZeroDouble, pStreamBaseGFX, vOffsetsLo, vMaskLo);
- Value* pGatherHi =
- GATHERPD(vZeroDouble, pStreamBaseGFX, vOffsetsHi, vMaskHi);
-
- Value* pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
- pGather = FP_TRUNC(pGather, mSimdFP32Ty);
-
- vVertexElements[currentVertexElement++] = pGather;
- }
- else
- {
- vVertexElements[currentVertexElement++] =
- GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
-
- // offset base to the next component in the vertex to gather
- pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)8));
- }
- }
- break;
- default:
- SWR_INVALID("Tried to fetch invalid FP format");
- break;
- }
- }
- else
- {
- Instruction::CastOps extendCastType = Instruction::CastOps::CastOpsEnd;
- ConversionType conversionType = CONVERT_NONE;
-
- SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
- "Unsupported format for standard gather fetch.");
-
- switch (info.type[0])
- {
- case SWR_TYPE_UNORM:
- conversionType = CONVERT_NORMALIZED;
- case SWR_TYPE_UINT:
- extendCastType = Instruction::CastOps::ZExt;
- break;
- case SWR_TYPE_SNORM:
- conversionType = CONVERT_NORMALIZED;
- case SWR_TYPE_SINT:
- extendCastType = Instruction::CastOps::SExt;
- break;
- case SWR_TYPE_USCALED:
- conversionType = CONVERT_USCALED;
- extendCastType = Instruction::CastOps::UIToFP;
- break;
- case SWR_TYPE_SSCALED:
- conversionType = CONVERT_SSCALED;
- extendCastType = Instruction::CastOps::SIToFP;
- break;
- case SWR_TYPE_SFIXED:
- conversionType = CONVERT_SFIXED;
- extendCastType = Instruction::CastOps::SExt;
- break;
- default:
- break;
- }
-
- // value substituted when component of gather is masked
- Value* gatherSrc = VIMMED1(0);
-
- // Gather components from memory to store in a simdvertex structure
- switch (bpc)
- {
- case 8:
- {
- // if we have at least one component to fetch
- if (compMask)
- {
- Value* vGatherResult = GATHERDD(gatherSrc,
- pStreamBaseGFX,
- vOffsets,
- vGatherMask,
- 1,
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- // e.g. result of an 8x32bit integer gather for 8bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
-
- Shuffle8bpcArgs args = std::forward_as_tuple(vGatherResult,
- pVtxOut,
- extendCastType,
- conversionType,
- currentVertexElement,
- outputElt,
- compMask,
- compCtrl,
- vVertexElements,
- info.swizzle);
-
- // Shuffle gathered components into place in simdvertex struct
- mVWidth == 16 ? Shuffle8bpcGatherd16(args)
- : Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
- }
- }
- break;
- case 16:
- {
- Value* vGatherResult[2];
-
- // if we have at least one component out of x or y to fetch
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- vGatherResult[0] = GATHERDD(gatherSrc,
- pStreamBaseGFX,
- vOffsets,
- vGatherMask,
- 1,
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- // e.g. result of first 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
- //
- }
-
- // if we have at least one component out of z or w to fetch
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- // offset base to the next components(zw) in the vertex to gather
- pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
-
- vGatherResult[1] = GATHERDD(gatherSrc,
- pStreamBaseGFX,
- vOffsets,
- vGatherMask,
- 1,
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
- // e.g. result of second 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
- //
- }
-
- // if we have at least one component to shuffle into place
- if (compMask)
- {
- Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult,
- pVtxOut,
- extendCastType,
- conversionType,
- currentVertexElement,
- outputElt,
- compMask,
- compCtrl,
- vVertexElements);
-
- // Shuffle gathered components into place in simdvertex struct
- mVWidth == 16 ? Shuffle16bpcGather16(args)
- : Shuffle16bpcGather(args); // outputs to vVertexElements ref
- }
- }
- break;
- case 32:
- {
- // Gathered components into place in simdvertex struct
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- // if we need to gather the component
- if (compCtrl[i] == StoreSrc)
- {
- Value* pGather = GATHERDD(gatherSrc,
- pStreamBaseGFX,
- vOffsets,
- vGatherMask,
- 1,
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
-
- if (conversionType == CONVERT_USCALED)
- {
- pGather = UI_TO_FP(pGather, mSimdFP32Ty);
- }
- else if (conversionType == CONVERT_SSCALED)
- {
- pGather = SI_TO_FP(pGather, mSimdFP32Ty);
- }
- else if (conversionType == CONVERT_SFIXED)
- {
- pGather = FMUL(SI_TO_FP(pGather, mSimdFP32Ty),
- VBROADCAST(C(1 / 65536.0f)));
- }
-
- vVertexElements[currentVertexElement++] = pGather;
-
- // e.g. result of a single 8x32bit integer gather for 32bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
- }
- else
- {
- vVertexElements[currentVertexElement++] =
- GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
-
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
-
- // offset base to the next component in the vertex to gather
- pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
- }
- }
- break;
- }
- }
- }
-
- // if we have a partially filled vVertexElement struct, output it
- if (currentVertexElement > 0)
- {
- StoreVertexElements(pVtxOut, outputElt++, currentVertexElement, vVertexElements);
- }
-}
-
-
-typedef void* (*PFN_TRANSLATEGFXADDRESS_FUNC)(void* pdc, gfxptr_t va, bool* out_pbNullTileAccessed, void* pWorkerData);
-
-template <typename T>
-void GetSimdValidIndicesGfx(gfxptr_t indices,
- gfxptr_t lastIndex,
- uint32_t vWidth,
- PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate,
- void* pdc,
- uint32_t* outIndices,
- void* pWorkerData)
-{
- SWR_ASSERT(outIndices != nullptr);
-
- gfxptr_t indexPtr = indices;
- for (int64_t lane = 0; lane < vWidth; lane++)
- {
- uint32_t index = 0;
-
- if (indexPtr < lastIndex)
- {
- // translate indexPtr and load from it
- T* addr = (T*)pfnTranslate(pdc, indexPtr, nullptr, pWorkerData);
- SWR_ASSERT(addr != nullptr);
- index = *addr;
- }
-
- // index to 32 bits and insert into the correct simd lane
- outIndices[lane] = index;
-
- indexPtr += sizeof(T);
- }
-}
-
-void GetSimdValid8bitIndicesGfx(gfxptr_t indices,
- gfxptr_t lastIndex,
- uint32_t vWidth,
- PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate,
- void* pdc,
- uint32_t* outIndices,
- void* pWorkerData)
-{
- GetSimdValidIndicesGfx<uint8_t>(indices, lastIndex, vWidth, pfnTranslate, pdc, outIndices, pWorkerData);
-}
-
-void GetSimdValid16bitIndicesGfx(gfxptr_t indices,
- gfxptr_t lastIndex,
- uint32_t vWidth,
- PFN_TRANSLATEGFXADDRESS_FUNC pfnTranslate,
- void* pdc,
- uint32_t* outIndices,
- void* pWorkerData)
-{
- GetSimdValidIndicesGfx<uint16_t>(indices, lastIndex, vWidth, pfnTranslate, pdc, outIndices, pWorkerData);
-}
-
-
-template <typename T>
-Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
-{
- SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty,
- "Function expects gfxptr_t for both input parameters.");
-
- Type* Ty = nullptr;
-
- static_assert(sizeof(T) == sizeof(uint16_t) || sizeof(T) == sizeof(uint8_t),
- "Unsupported type for use with GetSimdValidIndicesHelper<T>");
- constexpr bool bSize = (sizeof(T) == sizeof(uint16_t));
- if (bSize)
- {
- Ty = mInt16PtrTy;
- }
- else if (sizeof(T) == sizeof(uint8_t))
- {
- Ty = mInt8PtrTy;
- }
- else
- {
- SWR_ASSERT(false, "This should never happen as per static_assert above.");
- }
-
- Value* vIndices = VUNDEF_I();
-
- {
- // store 0 index on stack to be used to conditionally load from if index address is OOB
- Value* pZeroIndex = ALLOCA(Ty->getPointerElementType());
- STORE(C((T)0), pZeroIndex);
-
- // Load a SIMD of index pointers
- for (int64_t lane = 0; lane < mVWidth; lane++)
- {
- // Calculate the address of the requested index
- Value* pIndex = GEP(pIndices, C(lane), Ty);
-
- pLastIndex = INT_TO_PTR(pLastIndex, Ty);
-
- // check if the address is less than the max index,
- Value* mask = ICMP_ULT(pIndex, pLastIndex);
-
- // if valid, load the index. if not, load 0 from the stack
- Value* pValid = SELECT(mask, pIndex, pZeroIndex);
- Value* index = LOAD(pValid, "valid index", Ty, MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
-
- // zero extended index to 32 bits and insert into the correct simd lane
- index = Z_EXT(index, mInt32Ty);
- vIndices = VINSERT(vIndices, index, lane);
- }
- }
-
- return vIndices;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a simd of valid indices. OOB indices are set to 0
-/// *Note* have to do 8bit index checking in scalar until we have AVX-512
-/// support
-/// @param pIndices - pointer to 8 bit indices
-/// @param pLastIndex - pointer to last valid index
-Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
-{
- return GetSimdValidIndicesHelper<uint8_t>(pIndices, pLastIndex);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a simd of valid indices. OOB indices are set to 0
-/// *Note* have to do 16bit index checking in scalar until we have AVX-512
-/// support
-/// @param pIndices - pointer to 16 bit indices
-/// @param pLastIndex - pointer to last valid index
-Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
-{
- return GetSimdValidIndicesHelper<uint16_t>(pIndices, pLastIndex);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a simd of valid indices. OOB indices are set to 0
-/// @param pIndices - pointer to 32 bit indices
-/// @param pLastIndex - pointer to last valid index
-Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
-{
- DataLayout dL(JM()->mpCurrentModule);
- Value* iLastIndex = pLastIndex;
- Value* iIndices = pIndices;
-
- // get the number of indices left in the buffer (endPtr - curPtr) / sizeof(index)
- Value* numIndicesLeft = SUB(iLastIndex, iIndices);
- numIndicesLeft = TRUNC(numIndicesLeft, mInt32Ty);
- numIndicesLeft = SDIV(numIndicesLeft, C(4));
-
- // create a vector of index counts from the base index ptr passed into the fetch
- Constant* vIndexOffsets;
- if (mVWidth == 8)
- {
- vIndexOffsets = C({0, 1, 2, 3, 4, 5, 6, 7});
- }
- else
- {
- vIndexOffsets = C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
- }
-
- // compare index count to the max valid index
- // e.g vMaxIndex 4 4 4 4 4 4 4 4 : 4 indices left to load
- // vIndexOffsets 0 1 2 3 4 5 6 7
- // ------------------------------
- // vIndexMask -1-1-1-1 0 0 0 0 : offsets < max pass
- // vLoadedIndices 0 1 2 3 0 0 0 0 : offsets >= max masked to 0
- Value* vMaxIndex = VBROADCAST(numIndicesLeft);
- Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
-
- // Load the indices; OOB loads 0
- return MASKED_LOAD(pIndices,
- 4,
- vIndexMask,
- VIMMED1(0),
- "vIndices",
- PointerType::get(mSimdInt32Ty, 0),
- MEM_CLIENT::GFX_MEM_CLIENT_FETCH);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Takes a SIMD of gathered 8bpc verts, zero or sign extends,
-/// denormalizes if needed, converts to F32 if needed, and positions in
-// the proper SIMD rows to be output to the simdvertex structure
-/// @param args: (tuple of args, listed below)
-/// @param vGatherResult - 8 gathered 8bpc vertices
-/// @param pVtxOut - base pointer to output simdvertex struct
-/// @param extendType - sign extend or zero extend
-/// @param bNormalized - do we need to denormalize?
-/// @param currentVertexElement - reference to the current vVertexElement
-/// @param outputElt - reference to the current offset from simdvertex we're o
-/// @param compMask - component packing mask
-/// @param compCtrl - component control val
-/// @param vVertexElements[4] - vertex components to output
-/// @param swizzle[4] - component swizzle location
-void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs& args)
-{
- // Unpack tuple args
- Value*& vGatherResult = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t& currentVertexElement = std::get<4>(args);
- uint32_t& outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value*(&vVertexElements)[4] = std::get<8>(args);
- const uint32_t(&swizzle)[4] = std::get<9>(args);
-
- // cast types
- Type* vGatherTy = getVectorType(mInt32Ty, 8);
- Type* v32x8Ty = getVectorType(mInt8Ty, 32);
-
- // have to do extra work for sign extending
- if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP))
- {
- Type* v16x8Ty = getVectorType(mInt8Ty, 16); // 8x16bit ints in a 128bit lane
- Type* v128Ty = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128), 2);
-
- // shuffle mask, including any swizzling
- const char x = (char)swizzle[0];
- const char y = (char)swizzle[1];
- const char z = (char)swizzle[2];
- const char w = (char)swizzle[3];
- Value* vConstMask = C<char>(
- {char(x), char(x + 4), char(x + 8), char(x + 12), char(y), char(y + 4),
- char(y + 8), char(y + 12), char(z), char(z + 4), char(z + 8), char(z + 12),
- char(w), char(w + 4), char(w + 8), char(w + 12), char(x), char(x + 4),
- char(x + 8), char(x + 12), char(y), char(y + 4), char(y + 8), char(y + 12),
- char(z), char(z + 4), char(z + 8), char(z + 12), char(w), char(w + 4),
- char(w + 8), char(w + 12)});
-
- // SIMD16 PSHUFB isn't part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
-
- Value* vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
- Value* vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
-
- Value* vShufResult_lo =
- BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
- Value* vShufResult_hi =
- BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
-
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
-
- Value* vi128XY_lo = nullptr;
- Value* vi128XY_hi = nullptr;
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- vi128XY_lo = BITCAST(
- VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})),
- v128Ty);
- vi128XY_hi = BITCAST(
- VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})),
- v128Ty);
-
- // after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
- }
-
- // do the same for zw components
- Value* vi128ZW_lo = nullptr;
- Value* vi128ZW_hi = nullptr;
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- vi128ZW_lo = BITCAST(
- VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})),
- v128Ty);
- vi128ZW_hi = BITCAST(
- VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})),
- v128Ty);
- }
-
- // init denormalize variables if needed
- Instruction::CastOps fpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 127.0));
- break;
- case CONVERT_SSCALED:
- fpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0));
- break;
- case CONVERT_USCALED:
- assert(false && "Type should not be sign extended!");
- conversionFactor = nullptr;
- break;
- default:
- assert(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
-
- // sign extend all enabled components. If we have a fill vVertexElements, output to current
- // simdvertex
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
- Value* selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
-
- // sign extend
- Value* temp_lo =
- PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v16x8Ty));
- Value* temp_hi =
- PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v16x8Ty));
-
- Value* temp = JOIN_16(temp_lo, temp_hi);
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- temp = FMUL(CAST(fpCast, temp, mSimdFP32Ty), conversionFactor);
- }
-
- vVertexElements[currentVertexElement] = temp;
-
- currentVertexElement += 1;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- // else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) ||
- (extendType == Instruction::CastOps::UIToFP))
- {
- // init denormalize variables if needed
- Instruction::CastOps fpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 255.0));
- break;
- case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0));
- break;
- case CONVERT_SSCALED:
- assert(false && "Type should not be zero extended!");
- conversionFactor = nullptr;
- break;
- default:
- assert(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
-
- // shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // pshufb masks for each component
- Value* vConstMask;
- switch (swizzle[i])
- {
- case 0:
- // x shuffle mask
- vConstMask =
- C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
- break;
- case 1:
- // y shuffle mask
- vConstMask =
- C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
- break;
- case 2:
- // z shuffle mask
- vConstMask =
- C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
- break;
- case 3:
- // w shuffle mask
- vConstMask =
- C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
- break;
- default:
- assert(false && "Invalid component");
- vConstMask = nullptr;
- break;
- }
-
- Value* vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
- Value* vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
-
- Value* temp_lo =
- BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
- Value* temp_hi =
- BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
-
- // after pshufb for x channel
- // 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
-
- Value* temp = JOIN_16(temp_lo, temp_hi);
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- temp = FMUL(CAST(fpCast, temp, mSimdFP32Ty), conversionFactor);
- }
-
- vVertexElements[currentVertexElement] = temp;
-
- currentVertexElement += 1;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- else
- {
- SWR_INVALID("Unsupported conversion type");
- }
-}
-
-void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs& args)
-{
- // Unpack tuple args
- Value*& vGatherResult = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t& currentVertexElement = std::get<4>(args);
- uint32_t& outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value*(&vVertexElements)[4] = std::get<8>(args);
- const uint32_t(&swizzle)[4] = std::get<9>(args);
-
- // cast types
- Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
-
- for (uint32_t i = 0; i < 4; i++)
- {
- if (!isComponentEnabled(compMask, i))
- continue;
-
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
-#if LLVM_VERSION_MAJOR >= 11
- using MaskType = int32_t;
-#else
- using MaskType = uint32_t;
-#endif
- std::vector<MaskType> vShuffleMasks[4] = {
- {0, 4, 8, 12, 16, 20, 24, 28}, // x
- {1, 5, 9, 13, 17, 21, 25, 29}, // y
- {2, 6, 10, 14, 18, 22, 26, 30}, // z
- {3, 7, 11, 15, 19, 23, 27, 31}, // w
- };
-
- Value* val = VSHUFFLE(BITCAST(vGatherResult, v32x8Ty),
- UndefValue::get(v32x8Ty),
- vShuffleMasks[swizzle[i]]);
-
- if ((extendType == Instruction::CastOps::SExt) ||
- (extendType == Instruction::CastOps::SIToFP))
- {
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- val = FMUL(SI_TO_FP(val, mSimdFP32Ty), VIMMED1((float)(1.0 / 127.0)));
- break;
- case CONVERT_SSCALED:
- val = SI_TO_FP(val, mSimdFP32Ty);
- break;
- case CONVERT_USCALED:
- SWR_INVALID("Type should not be sign extended!");
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- val = S_EXT(val, mSimdInt32Ty);
- break;
- }
- }
- else if ((extendType == Instruction::CastOps::ZExt) ||
- (extendType == Instruction::CastOps::UIToFP))
- {
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- val = FMUL(UI_TO_FP(val, mSimdFP32Ty), VIMMED1((float)(1.0 / 255.0)));
- break;
- case CONVERT_SSCALED:
- SWR_INVALID("Type should not be zero extended!");
- break;
- case CONVERT_USCALED:
- val = UI_TO_FP(val, mSimdFP32Ty);
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- val = Z_EXT(val, mSimdInt32Ty);
- break;
- }
- }
- else
- {
- SWR_INVALID("Unsupported conversion type");
- }
-
- vVertexElements[currentVertexElement++] = val;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Takes a SIMD of gathered 16bpc verts, zero or sign extends,
-/// denormalizes if needed, converts to F32 if needed, and positions in
-// the proper SIMD rows to be output to the simdvertex structure
-/// @param args: (tuple of args, listed below)
-/// @param vGatherResult[2] - array of gathered 16bpc vertices, 4 per index
-/// @param pVtxOut - base pointer to output simdvertex struct
-/// @param extendType - sign extend or zero extend
-/// @param bNormalized - do we need to denormalize?
-/// @param currentVertexElement - reference to the current vVertexElement
-/// @param outputElt - reference to the current offset from simdvertex we're o
-/// @param compMask - component packing mask
-/// @param compCtrl - component control val
-/// @param vVertexElements[4] - vertex components to output
-void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs& args)
-{
- // Unpack tuple args
- Value*(&vGatherResult)[2] = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t& currentVertexElement = std::get<4>(args);
- uint32_t& outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value*(&vVertexElements)[4] = std::get<8>(args);
-
- // cast types
- Type* vGatherTy = getVectorType(mInt32Ty, 8);
- Type* v32x8Ty = getVectorType(mInt8Ty, 32);
-
- // have to do extra work for sign extending
- if ((extendType == Instruction::CastOps::SExt) ||
- (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
- {
- // is this PP float?
- bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
-
- Type* v8x16Ty = getVectorType(mInt16Ty, 8); // 8x16bit in a 128bit lane
- Type* v128bitTy = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128), 2);
-
- // shuffle mask
- Value* vConstMask = C<uint8_t>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
- 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
- Value* vi128XY_lo = nullptr;
- Value* vi128XY_hi = nullptr;
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- // SIMD16 PSHUFB isn't part of AVX-512F, so split into SIMD8 for the sake of KNL, for
- // now..
-
- Value* vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[0], 0), v32x8Ty);
- Value* vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[0], 1), v32x8Ty);
-
- Value* vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
- Value* vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
-
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
-
- vi128XY_lo = BITCAST(
- VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
- v128bitTy);
- vi128XY_hi = BITCAST(
- VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
- v128bitTy);
-
- // after PERMD: move and pack xy components into each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
- }
-
- // do the same for zw components
- Value* vi128ZW_lo = nullptr;
- Value* vi128ZW_hi = nullptr;
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- Value* vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[1], 0), v32x8Ty);
- Value* vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[1], 1), v32x8Ty);
-
- Value* vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
- Value* vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
-
- vi128ZW_lo = BITCAST(
- VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
- v128bitTy);
- vi128ZW_hi = BITCAST(
- VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
- v128bitTy);
- }
-
- // init denormalize variables if needed
- Instruction::CastOps IntToFpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- IntToFpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 32767.0));
- break;
- case CONVERT_SSCALED:
- IntToFpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0));
- break;
- case CONVERT_USCALED:
- assert(false && "Type should not be sign extended!");
- conversionFactor = nullptr;
- break;
- default:
- assert(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
-
- // sign extend all enabled components. If we have a fill vVertexElements, output to current
- // simdvertex
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
- Value* selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
-
- if (bFP)
- {
- // extract 128 bit lanes to sign extend each component
- Value* temp_lo =
- CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
- Value* temp_hi =
- CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
-
- vVertexElements[currentVertexElement] = JOIN_16(temp_lo, temp_hi);
- }
- else
- {
- // extract 128 bit lanes to sign extend each component
- Value* temp_lo =
- PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
- Value* temp_hi =
- PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
-
- Value* temp = JOIN_16(temp_lo, temp_hi);
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- temp = FMUL(CAST(IntToFpCast, temp, mSimdFP32Ty), conversionFactor);
- }
-
- vVertexElements[currentVertexElement] = temp;
- }
-
- currentVertexElement += 1;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- // else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) ||
- (extendType == Instruction::CastOps::UIToFP))
- {
- // pshufb masks for each component
- Value* vConstMask[2];
-
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2))
- {
- // x/z shuffle mask
- vConstMask[0] = C<char>({
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- });
- }
-
- if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3))
- {
- // y/w shuffle mask
- vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
- 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
- }
-
- // init denormalize variables if needed
- Instruction::CastOps fpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 65535.0));
- break;
- case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0f));
- break;
- case CONVERT_SSCALED:
- SWR_INVALID("Type should not be zero extended!");
- conversionFactor = nullptr;
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
-
- // shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // select correct constMask for x/z or y/w pshufb
- uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- uint32_t selectedGather = (i < 2) ? 0 : 1;
-
- // SIMD16 PSHUFB isn't part of AVX-512F, so split into SIMD8 for the sake of KNL,
- // for now..
-
- Value* vGatherResult_lo = EXTRACT_16(vGatherResult[selectedGather], 0);
- Value* vGatherResult_hi = EXTRACT_16(vGatherResult[selectedGather], 1);
-
- Value* temp_lo = BITCAST(
- PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]),
- vGatherTy);
- Value* temp_hi = BITCAST(
- PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]),
- vGatherTy);
-
- // after pshufb mask for x channel; z uses the same shuffle from the second
- // gather 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
-
- Value* temp = JOIN_16(temp_lo, temp_hi);
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- temp = FMUL(CAST(fpCast, temp, mSimdFP32Ty), conversionFactor);
- }
-
- vVertexElements[currentVertexElement] = temp;
-
- currentVertexElement += 1;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- else
- {
- SWR_INVALID("Unsupported conversion type");
- }
-}
-
-void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs& args)
-{
- // Unpack tuple args
- Value*(&vGatherResult)[2] = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t& currentVertexElement = std::get<4>(args);
- uint32_t& outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value*(&vVertexElements)[4] = std::get<8>(args);
-
- // cast types
- Type* vGatherTy = getVectorType(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = getVectorType(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
-
- // have to do extra work for sign extending
- if ((extendType == Instruction::CastOps::SExt) ||
- (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
- {
- // is this PP float?
- bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
-
- Type* v8x16Ty = getVectorType(mInt16Ty, 8); // 8x16bit in a 128bit lane
- Type* v128bitTy = getVectorType(IntegerType::getIntNTy(JM()->mContext, 128),
- mVWidth / 4); // vwidth is units of 32 bits
-
- // shuffle mask
- Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
- 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
- Value* vi128XY = nullptr;
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- Value* vShufResult =
- BITCAST(PSHUFB(BITCAST(vGatherResult[0], v32x8Ty), vConstMask), vGatherTy);
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
-
- vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
- // after PERMD: move and pack xy components into each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
- }
-
- // do the same for zw components
- Value* vi128ZW = nullptr;
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- Value* vShufResult =
- BITCAST(PSHUFB(BITCAST(vGatherResult[1], v32x8Ty), vConstMask), vGatherTy);
- vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
- }
-
- // init denormalize variables if needed
- Instruction::CastOps IntToFpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- IntToFpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 32767.0));
- break;
- case CONVERT_SSCALED:
- IntToFpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0));
- break;
- case CONVERT_USCALED:
- SWR_INVALID("Type should not be sign extended!");
- conversionFactor = nullptr;
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
-
- // sign extend all enabled components. If we have a fill vVertexElements, output to current
- // simdvertex
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- if (bFP)
- {
- // extract 128 bit lanes to sign extend each component
- vVertexElements[currentVertexElement] =
- CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
- }
- else
- {
- // extract 128 bit lanes to sign extend each component
- vVertexElements[currentVertexElement] =
- PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- vVertexElements[currentVertexElement] =
- FMUL(CAST(IntToFpCast,
- vVertexElements[currentVertexElement],
- mSimdFP32Ty),
- conversionFactor);
- }
- }
- currentVertexElement++;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- // else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) ||
- (extendType == Instruction::CastOps::UIToFP))
- {
- // pshufb masks for each component
- Value* vConstMask[2];
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2))
- {
- // x/z shuffle mask
- vConstMask[0] = C<char>({
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- });
- }
-
- if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3))
- {
- // y/w shuffle mask
- vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
- 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
- }
-
- // init denormalize variables if needed
- Instruction::CastOps fpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 65535.0));
- break;
- case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0f));
- break;
- case CONVERT_SSCALED:
- SWR_INVALID("Type should not be zero extended!");
- conversionFactor = nullptr;
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
-
- // shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
- for (uint32_t i = 0; i < 4; i++)
- {
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // select correct constMask for x/z or y/w pshufb
- uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- uint32_t selectedGather = (i < 2) ? 0 : 1;
-
- vVertexElements[currentVertexElement] =
- BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty),
- vConstMask[selectedMask]),
- vGatherTy);
- // after pshufb mask for x channel; z uses the same shuffle from the second
- // gather 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- vVertexElements[currentVertexElement] =
- FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
- conversionFactor);
- }
- currentVertexElement++;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
- }
- }
- else
- {
- SWR_INVALID("Unsupported conversion type");
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Output a simdvertex worth of elements to the current outputElt
-/// @param pVtxOut - base address of VIN output struct
-/// @param outputElt - simdvertex offset in VIN to write to
-/// @param numEltsToStore - number of simdvertex rows to write out
-/// @param vVertexElements - LLVM Value*[] simdvertex to write out
-void FetchJit::StoreVertexElements(Value* pVtxOut,
- const uint32_t outputElt,
- const uint32_t numEltsToStore,
- Value* (&vVertexElements)[4])
-{
- SWR_ASSERT(numEltsToStore <= 4, "Invalid element count.");
-
- for (uint32_t c = 0; c < numEltsToStore; ++c)
- {
- // STORE expects FP32 x vWidth type, just bitcast if needed
- if (!vVertexElements[c]->getType()->getScalarType()->isFloatTy())
- {
-#if FETCH_DUMP_VERTEX
- PRINT("vVertexElements[%d]: 0x%x\n", {C(c), vVertexElements[c]});
-#endif
- vVertexElements[c] = BITCAST(vVertexElements[c], mSimdFP32Ty);
- }
-#if FETCH_DUMP_VERTEX
- else
- {
- PRINT("vVertexElements[%d]: %f\n", {C(c), vVertexElements[c]});
- }
-#endif
- // outputElt * 4 = offsetting by the size of a simdvertex
- // + c offsets to a 32bit x vWidth row within the current vertex
- Value* dest = GEP(pVtxOut, C(outputElt * 4 + c), nullptr, "destGEP");
- STORE(vVertexElements[c], dest);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Generates a constant vector of values based on the
-/// ComponentControl value
-/// @param ctrl - ComponentControl value
-Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
-{
- switch (ctrl)
- {
- case NoStore:
- return VUNDEF_I();
- case Store0:
- return VIMMED1(0);
- case Store1Fp:
- return VIMMED1(1.0f);
- case Store1Int:
- return VIMMED1(1);
- case StoreVertexId:
- {
- if (mVWidth == 16)
- {
- Type* pSimd8FPTy = getVectorType(mFP32Ty, 8);
- Value* pIdLo =
- BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID})), pSimd8FPTy);
- Value* pIdHi =
- BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID2})), pSimd8FPTy);
- return JOIN_16(pIdLo, pIdHi);
- }
- else
- {
- return BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID})), mSimdFP32Ty);
- }
- }
- case StoreInstanceId:
- {
- Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance})), mFP32Ty);
- return VBROADCAST(pId);
- }
-
-
- case StoreSrc:
- default:
- SWR_INVALID("Invalid component control");
- return VUNDEF_I();
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Returns the enable mask for the specified component.
-/// @param enableMask - enable bits
-/// @param component - component to check if enabled.
-bool isComponentEnabled(ComponentEnable enableMask, uint8_t component)
-{
- switch (component)
- {
- // X
- case 0:
- return (enableMask & ComponentEnable::X);
- // Y
- case 1:
- return (enableMask & ComponentEnable::Y);
- // Z
- case 2:
- return (enableMask & ComponentEnable::Z);
- // W
- case 3:
- return (enableMask & ComponentEnable::W);
-
- default:
- return false;
- }
-}
-
-// Don't want two threads compiling the same fetch shader simultaneously
-// Has problems in the JIT cache implementation
-// This is only a problem for fetch right now.
-static std::mutex gFetchCodegenMutex;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JITs from fetch shader IR
-/// @param hJitMgr - JitManager handle
-/// @param func - LLVM function IR
-/// @return PFN_FETCH_FUNC - pointer to fetch code
-PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc)
-{
- const llvm::Function* func = (const llvm::Function*)hFunc;
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
- PFN_FETCH_FUNC pfnFetch;
-
- gFetchCodegenMutex.lock();
- pfnFetch = (PFN_FETCH_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
- // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
- // add new IR to the module
- pJitMgr->mIsModuleFinalized = true;
-
-#if defined(KNOB_SWRC_TRACING)
- char fName[1024];
- const char* funcName = func->getName().data();
- sprintf(fName, "%s.bin", funcName);
- FILE* fd = fopen(fName, "wb");
- fwrite((void*)pfnFetch, 1, 2048, fd);
- fclose(fd);
-#endif
-
- pJitMgr->DumpAsm(const_cast<llvm::Function*>(func), "final");
- gFetchCodegenMutex.unlock();
-
-
- return pfnFetch;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compiles fetch shader
-/// @param hJitMgr - JitManager handle
-/// @param state - fetch state to build function from
-extern "C" PFN_FETCH_FUNC JITCALL JitCompileFetch(HANDLE hJitMgr, const FETCH_COMPILE_STATE& state)
-{
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
-
- pJitMgr->SetupNewModule();
-
- FetchJit theJit(pJitMgr);
- HANDLE hFunc = theJit.Create(state);
-
- return JitFetchFunc(hJitMgr, hFunc);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
deleted file mode 100644
index 9c4c6672184..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file fetch_jit.h
- *
- * @brief Definition of the fetch jitter
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/formats.h"
-#include "core/state.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// INPUT_ELEMENT_DESC
-//////////////////////////////////////////////////////////////////////////
-struct INPUT_ELEMENT_DESC
-{
- union
- {
- struct
- {
- uint32_t AlignedByteOffset : 12;
- uint32_t Format : 10;
- uint32_t StreamIndex : 6;
- uint32_t InstanceEnable : 1;
- uint32_t InstanceStrideEnable : 1;
- uint32_t ComponentControl0 : 4;
- uint32_t ComponentControl1 : 4;
- uint32_t ComponentControl2 : 4;
- uint32_t ComponentControl3 : 4;
- uint32_t ComponentPacking : 4;
- uint32_t _reserved : 14;
- };
- uint64_t bits;
- };
- uint32_t InstanceAdvancementState;
-};
-
-// used to set ComponentPacking
-enum ComponentEnable
-{
- NONE = 0x0,
- X = 0x1,
- Y = 0x2,
- XY = 0x3,
- Z = 0x4,
- XZ = 0x5,
- YZ = 0x6,
- XYZ = 0x7,
- W = 0x8,
- XW = 0x9,
- YW = 0xA,
- XYW = 0xB,
- ZW = 0xC,
- XZW = 0xD,
- YZW = 0xE,
- XYZW = 0xF,
-};
-
-enum ComponentControl
-{
- NoStore = 0,
- StoreSrc = 1,
- Store0 = 2,
- Store1Fp = 3,
- Store1Int = 4,
- StoreVertexId = 5,
- StoreInstanceId = 6,
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// State required for fetch shader jit compile.
-//////////////////////////////////////////////////////////////////////////
-struct FETCH_COMPILE_STATE
-{
- uint32_t numAttribs{0};
- INPUT_ELEMENT_DESC layout[SWR_VTX_NUM_SLOTS];
- SWR_FORMAT indexType;
- uint32_t cutIndex{0xffffffff};
-
- // Options that effect the JIT'd code
- bool bDisableIndexOOBCheck; // If enabled, FetchJit will exclude index OOB check
- bool bEnableCutIndex{false}; // Compares indices with the cut index and returns a cut mask
- bool bVertexIDOffsetEnable{false}; // Offset vertexID by StartVertex for non-indexed draws or
- // BaseVertex for indexed draws
- bool bPartialVertexBuffer{
- false}; // for indexed draws, map illegal indices to a known resident vertex
-
- bool bForceSequentialAccessEnable{false};
- bool bInstanceIDOffsetEnable{false};
-
- FETCH_COMPILE_STATE(bool disableIndexOOBCheck = false) :
- bDisableIndexOOBCheck(disableIndexOOBCheck){};
-
- bool operator==(const FETCH_COMPILE_STATE& other) const
- {
- if (numAttribs != other.numAttribs)
- return false;
- if (indexType != other.indexType)
- return false;
- if (bDisableIndexOOBCheck != other.bDisableIndexOOBCheck)
- return false;
- if (bEnableCutIndex != other.bEnableCutIndex)
- return false;
- if (cutIndex != other.cutIndex)
- return false;
- if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable)
- return false;
- if (bPartialVertexBuffer != other.bPartialVertexBuffer)
- return false;
- if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable)
- return false;
- if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable)
- return false;
-
- for (uint32_t i = 0; i < numAttribs; ++i)
- {
- if ((layout[i].bits != other.layout[i].bits) ||
- (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) &&
- (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState)))
- {
- return false;
- }
- }
-
- return true;
- }
-};
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
deleted file mode 100644
index 61c6b57b38b..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ /dev/null
@@ -1,962 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file lower_x86.cpp
- *
- * @brief llvm pass to lower meta code to x86
- *
- * Notes:
- *
- ******************************************************************************/
-
-#include "jit_pch.hpp"
-#include "passes.h"
-#include "JitManager.h"
-
-#include "common/simdlib.hpp"
-
-#include <unordered_map>
-
-extern "C" void ScatterPS_256(uint8_t*, SIMD256::Integer, SIMD256::Float, uint8_t, uint32_t);
-
-namespace llvm
-{
- // forward declare the initializer
- void initializeLowerX86Pass(PassRegistry&);
-} // namespace llvm
-
-namespace SwrJit
-{
- using namespace llvm;
-
- enum TargetArch
- {
- AVX = 0,
- AVX2 = 1,
- AVX512 = 2
- };
-
- enum TargetWidth
- {
- W256 = 0,
- W512 = 1,
- NUM_WIDTHS = 2
- };
-
- struct LowerX86;
-
- typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
-
- struct X86Intrinsic
- {
- IntrinsicID intrin[NUM_WIDTHS];
- EmuFunc emuFunc;
- };
-
- // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the
- // previous behavior of mapping directly to avx/avx2 intrinsics.
- using intrinsicMap_t = std::map<std::string, IntrinsicID>;
- static intrinsicMap_t& getIntrinsicMap() {
- static std::map<std::string, IntrinsicID> intrinsicMap = {
- {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
- {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
- {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
- {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
- {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
- {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
- {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
- {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}
- };
- return intrinsicMap;
- }
-
- // Forward decls
- Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
- VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
- VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
- VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
- VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
- VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction*
- VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-
- Instruction* DOUBLE_EMU(LowerX86* pThis,
- TargetArch arch,
- TargetWidth width,
- CallInst* pCallInst,
- Intrinsic::ID intrin);
-
- static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
-
- using intrinsicMapAdvanced_t = std::vector<std::map<std::string, X86Intrinsic>>;
-
- static intrinsicMapAdvanced_t& getIntrinsicMapAdvanced()
- {
- // clang-format off
- static intrinsicMapAdvanced_t intrinsicMapAdvanced = {
- // 256 wide 512 wide
- {
- // AVX
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- {
- // AVX2
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- {
- // AVX512
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
- #if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
- #else
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- #endif
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- #if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
- #else
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
- #endif
- {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}
- }};
- // clang-format on
- return intrinsicMapAdvanced;
- }
-
- static uint32_t getBitWidth(VectorType *pVTy)
- {
-#if LLVM_VERSION_MAJOR >= 12
- return cast<FixedVectorType>(pVTy)->getNumElements() * pVTy->getElementType()->getPrimitiveSizeInBits();
-#elif LLVM_VERSION_MAJOR >= 11
- return pVTy->getNumElements() * pVTy->getElementType()->getPrimitiveSizeInBits();
-#else
- return pVTy->getBitWidth();
-#endif
- }
-
- struct LowerX86 : public FunctionPass
- {
- LowerX86(Builder* b = nullptr) : FunctionPass(ID), B(b)
- {
- initializeLowerX86Pass(*PassRegistry::getPassRegistry());
-
- // Determine target arch
- if (JM()->mArch.AVX512F())
- {
- mTarget = AVX512;
- }
- else if (JM()->mArch.AVX2())
- {
- mTarget = AVX2;
- }
- else if (JM()->mArch.AVX())
- {
- mTarget = AVX;
- }
- else
- {
- SWR_ASSERT(false, "Unsupported AVX architecture.");
- mTarget = AVX;
- }
-
- // Setup scatter function for 256 wide
- uint32_t curWidth = B->mVWidth;
- B->SetTargetWidth(8);
- std::vector<Type*> args = {
- B->mInt8PtrTy, // pBase
- B->mSimdInt32Ty, // vIndices
- B->mSimdFP32Ty, // vSrc
- B->mInt8Ty, // mask
- B->mInt32Ty // scale
- };
-
- FunctionType* pfnScatterTy = FunctionType::get(B->mVoidTy, args, false);
- mPfnScatter256 = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy).getCallee());
-#else
- B->JM()->mpCurrentModule->getOrInsertFunction("ScatterPS_256", pfnScatterTy));
-#endif
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("ScatterPS_256") == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("ScatterPS_256", (void*)&ScatterPS_256);
- }
-
- B->SetTargetWidth(curWidth);
- }
-
- // Try to decipher the vector type of the instruction. This does not work properly
- // across all intrinsics, and will have to be rethought. Probably need something
- // similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
- // intrinsic.
- void GetRequestedWidthAndType(CallInst* pCallInst,
- const StringRef intrinName,
- TargetWidth* pWidth,
- Type** pTy)
- {
- assert(pCallInst);
- Type* pVecTy = pCallInst->getType();
-
- // Check for intrinsic specific types
- // VCVTPD2PS type comes from src, not dst
- if (intrinName.equals("meta.intrinsic.VCVTPD2PS"))
- {
- Value* pOp = pCallInst->getOperand(0);
- assert(pOp);
- pVecTy = pOp->getType();
- }
-
- if (!pVecTy->isVectorTy())
- {
- for (auto& op : pCallInst->arg_operands())
- {
- if (op.get()->getType()->isVectorTy())
- {
- pVecTy = op.get()->getType();
- break;
- }
- }
- }
- SWR_ASSERT(pVecTy->isVectorTy(), "Couldn't determine vector size");
-
- uint32_t width = getBitWidth(cast<VectorType>(pVecTy));
- switch (width)
- {
- case 256:
- *pWidth = W256;
- break;
- case 512:
- *pWidth = W512;
- break;
- default:
- SWR_ASSERT(false, "Unhandled vector width %d", width);
- *pWidth = W256;
- }
-
- *pTy = pVecTy->getScalarType();
- }
-
- Value* GetZeroVec(TargetWidth width, Type* pTy)
- {
- uint32_t numElem = 0;
- switch (width)
- {
- case W256:
- numElem = 8;
- break;
- case W512:
- numElem = 16;
- break;
- default:
- SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
- }
-
- return ConstantVector::getNullValue(getVectorType(pTy, numElem));
- }
-
- Value* GetMask(TargetWidth width)
- {
- Value* mask;
- switch (width)
- {
- case W256:
- mask = B->C((uint8_t)-1);
- break;
- case W512:
- mask = B->C((uint16_t)-1);
- break;
- default:
- SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
- }
- return mask;
- }
-
- // Convert <N x i1> mask to <N x i32> x86 mask
- Value* VectorMask(Value* vi1Mask)
- {
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElem = cast<FixedVectorType>(vi1Mask->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElem = cast<VectorType>(vi1Mask->getType())->getNumElements();
-#else
- uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
-#endif
- return B->S_EXT(vi1Mask, getVectorType(B->mInt32Ty, numElem));
- }
-
- Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
- {
- Function* pFunc = pCallInst->getCalledFunction();
- assert(pFunc);
-
- auto& intrinsic = getIntrinsicMapAdvanced()[mTarget][pFunc->getName().str()];
- TargetWidth vecWidth;
- Type* pElemTy;
- GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
-
- // Check if there is a native intrinsic for this instruction
- IntrinsicID id = intrinsic.intrin[vecWidth];
- if (id == DOUBLE)
- {
- // Double pump the next smaller SIMD intrinsic
- SWR_ASSERT(vecWidth != 0, "Cannot double pump smallest SIMD width.");
- Intrinsic::ID id2 = intrinsic.intrin[vecWidth - 1];
- SWR_ASSERT(id2 != Intrinsic::not_intrinsic,
- "Cannot find intrinsic to double pump.");
- return DOUBLE_EMU(this, mTarget, vecWidth, pCallInst, id2);
- }
- else if (id != Intrinsic::not_intrinsic)
- {
- Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
- SmallVector<Value*, 8> args;
- for (auto& arg : pCallInst->arg_operands())
- {
- args.push_back(arg.get());
- }
-
- // If AVX512, all instructions add a src operand and mask. We'll pass in 0 src and
- // full mask for now Assuming the intrinsics are consistent and place the src
- // operand and mask last in the argument list.
- if (mTarget == AVX512)
- {
- if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS"))
- {
- args.push_back(GetZeroVec(W256, pCallInst->getType()->getScalarType()));
- args.push_back(GetMask(W256));
- // for AVX512 VCVTPD2PS, we also have to add rounding mode
- args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
- }
- else
- {
- args.push_back(GetZeroVec(vecWidth, pElemTy));
- args.push_back(GetMask(vecWidth));
- }
- }
-
- return B->CALLA(pIntrin, args);
- }
- else
- {
- // No native intrinsic, call emulation function
- return intrinsic.emuFunc(this, mTarget, vecWidth, pCallInst);
- }
-
- SWR_ASSERT(false);
- return nullptr;
- }
-
- Instruction* ProcessIntrinsic(CallInst* pCallInst)
- {
- Function* pFunc = pCallInst->getCalledFunction();
- assert(pFunc);
-
- // Forward to the advanced support if found
- if (getIntrinsicMapAdvanced()[mTarget].find(pFunc->getName().str()) != getIntrinsicMapAdvanced()[mTarget].end())
- {
- return ProcessIntrinsicAdvanced(pCallInst);
- }
-
- SWR_ASSERT(getIntrinsicMap().find(pFunc->getName().str()) != getIntrinsicMap().end(),
- "Unimplemented intrinsic %s.",
- pFunc->getName().str().c_str());
-
- Intrinsic::ID x86Intrinsic = getIntrinsicMap()[pFunc->getName().str()];
- Function* pX86IntrinFunc =
- Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
-
- SmallVector<Value*, 8> args;
- for (auto& arg : pCallInst->arg_operands())
- {
- args.push_back(arg.get());
- }
- return B->CALLA(pX86IntrinFunc, args);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief LLVM function pass run method.
- /// @param f- The function we're working on with this pass.
- virtual bool runOnFunction(Function& F)
- {
- std::vector<Instruction*> toRemove;
- std::vector<BasicBlock*> bbs;
-
- // Make temp copy of the basic blocks and instructions, as the intrinsic
- // replacement code might invalidate the iterators
- for (auto& b : F.getBasicBlockList())
- {
- bbs.push_back(&b);
- }
-
- for (auto* BB : bbs)
- {
- std::vector<Instruction*> insts;
- for (auto& i : BB->getInstList())
- {
- insts.push_back(&i);
- }
-
- for (auto* I : insts)
- {
- if (CallInst* pCallInst = dyn_cast<CallInst>(I))
- {
- Function* pFunc = pCallInst->getCalledFunction();
- if (pFunc)
- {
- if (pFunc->getName().startswith("meta.intrinsic"))
- {
- B->IRB()->SetInsertPoint(I);
- Instruction* pReplace = ProcessIntrinsic(pCallInst);
- toRemove.push_back(pCallInst);
- if (pReplace)
- {
- pCallInst->replaceAllUsesWith(pReplace);
- }
- }
- }
- }
- }
- }
-
- for (auto* pInst : toRemove)
- {
- pInst->eraseFromParent();
- }
-
- JitManager::DumpToFile(&F, "lowerx86");
-
- return true;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {}
-
- JitManager* JM() { return B->JM(); }
- Builder* B;
- TargetArch mTarget;
- Function* mPfnScatter256;
-
- static char ID; ///< Needed by LLVM to generate ID for FunctionPass.
- };
-
- char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
-
- FunctionPass* createLowerX86Pass(Builder* b) { return new LowerX86(b); }
-
- Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
- return nullptr;
- }
-
- Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- // Only need vperm emulation for AVX
- SWR_ASSERT(arch == AVX);
-
- Builder* B = pThis->B;
- auto v32A = pCallInst->getArgOperand(0);
- auto vi32Index = pCallInst->getArgOperand(1);
-
- Value* v32Result;
- if (isa<Constant>(vi32Index))
- {
- // Can use llvm shuffle vector directly with constant shuffle indices
- v32Result = B->VSHUFFLE(v32A, v32A, vi32Index);
- }
- else
- {
- v32Result = UndefValue::get(v32A->getType());
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElem = cast<FixedVectorType>(v32A->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElem = cast<VectorType>(v32A->getType())->getNumElements();
-#else
- uint32_t numElem = v32A->getType()->getVectorNumElements();
-#endif
- for (uint32_t l = 0; l < numElem; ++l)
- {
- auto i32Index = B->VEXTRACT(vi32Index, B->C(l));
- auto val = B->VEXTRACT(v32A, i32Index);
- v32Result = B->VINSERT(v32Result, val, B->C(l));
- }
- }
- return cast<Instruction>(v32Result);
- }
-
- Instruction*
- VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- Builder* B = pThis->B;
- auto vSrc = pCallInst->getArgOperand(0);
- auto pBase = pCallInst->getArgOperand(1);
- auto vi32Indices = pCallInst->getArgOperand(2);
- auto vi1Mask = pCallInst->getArgOperand(3);
- auto i8Scale = pCallInst->getArgOperand(4);
-
- pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
-#if LLVM_VERSION_MAJOR >= 11
-#if LLVM_VERSION_MAJOR >= 12
- FixedVectorType* pVectorType = cast<FixedVectorType>(vSrc->getType());
-#else
- VectorType* pVectorType = cast<VectorType>(vSrc->getType());
-#endif
- uint32_t numElem = pVectorType->getNumElements();
- auto srcTy = pVectorType->getElementType();
-#else
- uint32_t numElem = vSrc->getType()->getVectorNumElements();
- auto srcTy = vSrc->getType()->getVectorElementType();
-#endif
- auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
-
- Value* v32Gather = nullptr;
- if (arch == AVX)
- {
- // Full emulation for AVX
- // Store source on stack to provide a valid address to load from inactive lanes
- auto pStack = B->STACKSAVE();
- auto pTmp = B->ALLOCA(vSrc->getType());
- B->STORE(vSrc, pTmp);
-
- v32Gather = UndefValue::get(vSrc->getType());
-#if LLVM_VERSION_MAJOR <= 10
- auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
-#elif LLVM_VERSION_MAJOR == 11
- auto vi32Scale = ConstantVector::getSplat(ElementCount(numElem, false), cast<ConstantInt>(i32Scale));
-#else
- auto vi32Scale = ConstantVector::getSplat(ElementCount::get(numElem, false), cast<ConstantInt>(i32Scale));
-#endif
- auto vi32Offsets = B->MUL(vi32Indices, vi32Scale);
-
- for (uint32_t i = 0; i < numElem; ++i)
- {
- auto i32Offset = B->VEXTRACT(vi32Offsets, B->C(i));
- auto pLoadAddress = B->GEP(pBase, i32Offset);
- pLoadAddress = B->BITCAST(pLoadAddress, PointerType::get(srcTy, 0));
- auto pMaskedLoadAddress = B->GEP(pTmp, {0, i});
- auto i1Mask = B->VEXTRACT(vi1Mask, B->C(i));
- auto pValidAddress = B->SELECT(i1Mask, pLoadAddress, pMaskedLoadAddress);
- auto val = B->LOAD(pValidAddress);
- v32Gather = B->VINSERT(v32Gather, val, B->C(i));
- }
-
- B->STACKRESTORE(pStack);
- }
- else if (arch == AVX2 || (arch == AVX512 && width == W256))
- {
- Function* pX86IntrinFunc = nullptr;
- if (srcTy == B->mFP32Ty)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx2_gather_d_ps_256);
- }
- else if (srcTy == B->mInt32Ty)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx2_gather_d_d_256);
- }
- else if (srcTy == B->mDoubleTy)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx2_gather_d_q_256);
- }
- else
- {
- SWR_ASSERT(false, "Unsupported vector element type for gather.");
- }
-
- if (width == W256)
- {
- auto v32Mask = B->BITCAST(pThis->VectorMask(vi1Mask), vSrc->getType());
- v32Gather = B->CALL(pX86IntrinFunc, {vSrc, pBase, vi32Indices, v32Mask, i8Scale});
- }
- else if (width == W512)
- {
- // Double pump 4-wide for 64bit elements
-#if LLVM_VERSION_MAJOR >= 12
- if (cast<FixedVectorType>(vSrc->getType())->getElementType() == B->mDoubleTy)
-#elif LLVM_VERSION_MAJOR >= 11
- if (cast<VectorType>(vSrc->getType())->getElementType() == B->mDoubleTy)
-#else
- if (vSrc->getType()->getVectorElementType() == B->mDoubleTy)
-#endif
- {
- auto v64Mask = pThis->VectorMask(vi1Mask);
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElem = cast<FixedVectorType>(v64Mask->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElem = cast<VectorType>(v64Mask->getType())->getNumElements();
-#else
- uint32_t numElem = v64Mask->getType()->getVectorNumElements();
-#endif
- v64Mask = B->S_EXT(v64Mask, getVectorType(B->mInt64Ty, numElem));
- v64Mask = B->BITCAST(v64Mask, vSrc->getType());
-
- Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
- Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
-
- Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
- Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
-
- Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
- Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
-
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t numElemSrc0 = cast<FixedVectorType>(src0->getType())->getNumElements();
- uint32_t numElemMask0 = cast<FixedVectorType>(mask0->getType())->getNumElements();
- uint32_t numElemSrc1 = cast<FixedVectorType>(src1->getType())->getNumElements();
- uint32_t numElemMask1 = cast<FixedVectorType>(mask1->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t numElemSrc0 = cast<VectorType>(src0->getType())->getNumElements();
- uint32_t numElemMask0 = cast<VectorType>(mask0->getType())->getNumElements();
- uint32_t numElemSrc1 = cast<VectorType>(src1->getType())->getNumElements();
- uint32_t numElemMask1 = cast<VectorType>(mask1->getType())->getNumElements();
-#else
- uint32_t numElemSrc0 = src0->getType()->getVectorNumElements();
- uint32_t numElemMask0 = mask0->getType()->getVectorNumElements();
- uint32_t numElemSrc1 = src1->getType()->getVectorNumElements();
- uint32_t numElemMask1 = mask1->getType()->getVectorNumElements();
-#endif
- src0 = B->BITCAST(src0, getVectorType(B->mInt64Ty, numElemSrc0));
- mask0 = B->BITCAST(mask0, getVectorType(B->mInt64Ty, numElemMask0));
- Value* gather0 =
- B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
- src1 = B->BITCAST(src1, getVectorType(B->mInt64Ty, numElemSrc1));
- mask1 = B->BITCAST(mask1, getVectorType(B->mInt64Ty, numElemMask1));
- Value* gather1 =
- B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
- v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
- v32Gather = B->BITCAST(v32Gather, vSrc->getType());
- }
- else
- {
- // Double pump 8-wide for 32bit elements
- auto v32Mask = pThis->VectorMask(vi1Mask);
- v32Mask = B->BITCAST(v32Mask, vSrc->getType());
- Value* src0 = B->EXTRACT_16(vSrc, 0);
- Value* src1 = B->EXTRACT_16(vSrc, 1);
-
- Value* indices0 = B->EXTRACT_16(vi32Indices, 0);
- Value* indices1 = B->EXTRACT_16(vi32Indices, 1);
-
- Value* mask0 = B->EXTRACT_16(v32Mask, 0);
- Value* mask1 = B->EXTRACT_16(v32Mask, 1);
-
- Value* gather0 =
- B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
- Value* gather1 =
- B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
-
- v32Gather = B->JOIN_16(gather0, gather1);
- }
- }
- }
- else if (arch == AVX512)
- {
- Value* iMask = nullptr;
- Function* pX86IntrinFunc = nullptr;
- if (srcTy == B->mFP32Ty)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_gather_dps_512);
- iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
- }
- else if (srcTy == B->mInt32Ty)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_gather_dpi_512);
- iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
- }
- else if (srcTy == B->mDoubleTy)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_gather_dpd_512);
- iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
- }
- else
- {
- SWR_ASSERT(false, "Unsupported vector element type for gather.");
- }
-
- auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
- v32Gather = B->CALL(pX86IntrinFunc, {vSrc, pBase, vi32Indices, iMask, i32Scale});
- }
-
- return cast<Instruction>(v32Gather);
- }
- Instruction*
- VSCATTER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- Builder* B = pThis->B;
- auto pBase = pCallInst->getArgOperand(0);
- auto vi1Mask = pCallInst->getArgOperand(1);
- auto vi32Indices = pCallInst->getArgOperand(2);
- auto v32Src = pCallInst->getArgOperand(3);
- auto i32Scale = pCallInst->getArgOperand(4);
-
- if (arch != AVX512)
- {
- // Call into C function to do the scatter. This has significantly better compile perf
- // compared to jitting scatter loops for every scatter
- if (width == W256)
- {
- auto mask = B->BITCAST(vi1Mask, B->mInt8Ty);
- B->CALL(pThis->mPfnScatter256, {pBase, vi32Indices, v32Src, mask, i32Scale});
- }
- else
- {
- // Need to break up 512 wide scatter to two 256 wide
- auto maskLo = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
- auto indicesLo =
- B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
- auto srcLo = B->VSHUFFLE(v32Src, v32Src, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
-
- auto mask = B->BITCAST(maskLo, B->mInt8Ty);
- B->CALL(pThis->mPfnScatter256, {pBase, indicesLo, srcLo, mask, i32Scale});
-
- auto maskHi = B->VSHUFFLE(vi1Mask, vi1Mask, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
- auto indicesHi =
- B->VSHUFFLE(vi32Indices, vi32Indices, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
- auto srcHi = B->VSHUFFLE(v32Src, v32Src, B->C({8, 9, 10, 11, 12, 13, 14, 15}));
-
- mask = B->BITCAST(maskHi, B->mInt8Ty);
- B->CALL(pThis->mPfnScatter256, {pBase, indicesHi, srcHi, mask, i32Scale});
- }
- return nullptr;
- }
-
- Value* iMask;
- Function* pX86IntrinFunc;
- if (width == W256)
- {
- // No direct intrinsic supported in llvm to scatter 8 elem with 32bit indices, but we
- // can use the scatter of 8 elements with 64bit indices
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_scatter_qps_512);
-
- auto vi32IndicesExt = B->Z_EXT(vi32Indices, B->mSimdInt64Ty);
- iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
- B->CALL(pX86IntrinFunc, {pBase, iMask, vi32IndicesExt, v32Src, i32Scale});
- }
- else if (width == W512)
- {
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_scatter_dps_512);
- iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
- B->CALL(pX86IntrinFunc, {pBase, iMask, vi32Indices, v32Src, i32Scale});
- }
- return nullptr;
- }
-
- // No support for vroundps in avx512 (it is available in kncni), so emulate with avx
- // instructions
- Instruction*
- VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- SWR_ASSERT(arch == AVX512);
-
- auto B = pThis->B;
- auto vf32Src = pCallInst->getOperand(0);
- assert(vf32Src);
- auto i8Round = pCallInst->getOperand(1);
- assert(i8Round);
- auto pfnFunc =
- Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
-
- if (width == W256)
- {
- return cast<Instruction>(B->CALL2(pfnFunc, vf32Src, i8Round));
- }
- else if (width == W512)
- {
- auto v8f32SrcLo = B->EXTRACT_16(vf32Src, 0);
- auto v8f32SrcHi = B->EXTRACT_16(vf32Src, 1);
-
- auto v8f32ResLo = B->CALL2(pfnFunc, v8f32SrcLo, i8Round);
- auto v8f32ResHi = B->CALL2(pfnFunc, v8f32SrcHi, i8Round);
-
- return cast<Instruction>(B->JOIN_16(v8f32ResLo, v8f32ResHi));
- }
- else
- {
- SWR_ASSERT(false, "Unimplemented vector width.");
- }
-
- return nullptr;
- }
-
- Instruction*
- VCONVERT_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- SWR_ASSERT(arch == AVX512);
-
- auto B = pThis->B;
- auto vf32Src = pCallInst->getOperand(0);
-
- if (width == W256)
- {
- auto vf32SrcRound = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx_round_ps_256);
- return cast<Instruction>(B->FP_TRUNC(vf32SrcRound, B->mFP32Ty));
- }
- else if (width == W512)
- {
- // 512 can use intrinsic
- auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
- Intrinsic::x86_avx512_mask_cvtpd2ps_512);
- return cast<Instruction>(B->CALL(pfnFunc, vf32Src));
- }
- else
- {
- SWR_ASSERT(false, "Unimplemented vector width.");
- }
-
- return nullptr;
- }
-
- // No support for hsub in AVX512
- Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
- {
- SWR_ASSERT(arch == AVX512);
-
- auto B = pThis->B;
- auto src0 = pCallInst->getOperand(0);
- auto src1 = pCallInst->getOperand(1);
-
- // 256b hsub can just use avx intrinsic
- if (width == W256)
- {
- auto pX86IntrinFunc =
- Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_hsub_ps_256);
- return cast<Instruction>(B->CALL2(pX86IntrinFunc, src0, src1));
- }
- else if (width == W512)
- {
- // 512b hsub can be accomplished with shuf/sub combo
- auto minuend = B->VSHUFFLE(src0, src1, B->C({0, 2, 8, 10, 4, 6, 12, 14}));
- auto subtrahend = B->VSHUFFLE(src0, src1, B->C({1, 3, 9, 11, 5, 7, 13, 15}));
- return cast<Instruction>(B->SUB(minuend, subtrahend));
- }
- else
- {
- SWR_ASSERT(false, "Unimplemented vector width.");
- return nullptr;
- }
- }
-
- // Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from
- // each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide
- Instruction* DOUBLE_EMU(LowerX86* pThis,
- TargetArch arch,
- TargetWidth width,
- CallInst* pCallInst,
- Intrinsic::ID intrin)
- {
- auto B = pThis->B;
- SWR_ASSERT(width == W512);
- Value* result[2];
- Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
- for (uint32_t i = 0; i < 2; ++i)
- {
- SmallVector<Value*, 8> args;
- for (auto& arg : pCallInst->arg_operands())
- {
- auto argType = arg.get()->getType();
- if (argType->isVectorTy())
- {
-#if LLVM_VERSION_MAJOR >= 12
- uint32_t vecWidth = cast<FixedVectorType>(argType)->getNumElements();
- auto elemTy = cast<FixedVectorType>(argType)->getElementType();
-#elif LLVM_VERSION_MAJOR >= 11
- uint32_t vecWidth = cast<VectorType>(argType)->getNumElements();
- auto elemTy = cast<VectorType>(argType)->getElementType();
-#else
- uint32_t vecWidth = argType->getVectorNumElements();
- auto elemTy = argType->getVectorElementType();
-#endif
- Value* lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
- Value* argToPush = B->VSHUFFLE(arg.get(), B->VUNDEF(elemTy, vecWidth), lanes);
- args.push_back(argToPush);
- }
- else
- {
- args.push_back(arg.get());
- }
- }
- result[i] = B->CALLA(pX86IntrinFunc, args);
- }
- uint32_t vecWidth;
- if (result[0]->getType()->isVectorTy())
- {
- assert(result[1]->getType()->isVectorTy());
-#if LLVM_VERSION_MAJOR >= 12
- vecWidth = cast<FixedVectorType>(result[0]->getType())->getNumElements() +
- cast<FixedVectorType>(result[1]->getType())->getNumElements();
-#elif LLVM_VERSION_MAJOR >= 11
- vecWidth = cast<VectorType>(result[0]->getType())->getNumElements() +
- cast<VectorType>(result[1]->getType())->getNumElements();
-#else
- vecWidth = result[0]->getType()->getVectorNumElements() +
- result[1]->getType()->getVectorNumElements();
-#endif
- }
- else
- {
- vecWidth = 2;
- }
- Value* lanes = B->CInc<int>(0, vecWidth);
- return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes));
- }
-
-} // namespace SwrJit
-
-using namespace SwrJit;
-
-INITIALIZE_PASS_BEGIN(LowerX86, "LowerX86", "LowerX86", false, false)
-INITIALIZE_PASS_END(LowerX86, "LowerX86", "LowerX86", false, false)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
deleted file mode 100644
index e0bb75cdec9..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file passes.h
- *
- * @brief Include file for llvm passes
- *
- ******************************************************************************/
-#pragma once
-
-#include "JitManager.h"
-#include "builder.h"
-
-namespace SwrJit
-{
- using namespace llvm;
-
- FunctionPass* createLowerX86Pass(Builder* b);
-} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
deleted file mode 100644
index dcb051c3b53..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file jit_api.h
- *
- * @brief Platform independent JIT interface
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-#include "common/os.h"
-#include "core/utils.h"
-
-#include "fetch_jit.h"
-#include "streamout_jit.h"
-#include "blend_jit.h"
-
-#include <stdlib.h>
-
-#if defined(_WIN32)
-#define EXCEPTION_PRINT_STACK(ret) ret
-#endif // _WIN32
-
-#if defined(_WIN32)
-#define JITCALL __stdcall
-#else
-#define JITCALL
-#endif
-
-
-struct ShaderInfo;
-
-//////////////////////////////////////////////////////////////////////////
-/// Jit Compile Info Input
-//////////////////////////////////////////////////////////////////////////
-struct JIT_COMPILE_INPUT
-{
- SWR_SHADER_TYPE type;
- uint32_t crc;
-
- const void* pIR; ///< Pointer to LLVM IR text.
- size_t irLength;
-
- bool enableJitSampler;
-
-};
-
-
-extern "C" {
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Create JIT context.
-HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Destroy JIT context.
-void JITCALL JitDestroyContext(HANDLE hJitContext);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compile shader.
-/// @param hJitContext - Jit Context
-/// @param input - Input containing LLVM IR and other information
-/// @param output - Output containing information about JIT shader
-ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
-
-ShaderInfo* JITCALL JitGetShader(HANDLE hJitContext, const char* name);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT destroy shader.
-/// @param hJitContext - Jit Context
-/// @param pShaderInfo - pointer to shader object.
-void JITCALL JitDestroyShader(HANDLE hJitContext, ShaderInfo*& pShaderInfo);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compiles fetch shader
-/// @param hJitContext - Jit Context
-/// @param state - Fetch state to build function from
-PFN_FETCH_FUNC JITCALL JitCompileFetch(HANDLE hJitContext, const FETCH_COMPILE_STATE& state);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compiles streamout shader
-/// @param hJitContext - Jit Context
-/// @param state - SO state to build function from
-PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitContext, const STREAMOUT_COMPILE_STATE& state);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compiles blend shader
-/// @param hJitContext - Jit Context
-/// @param state - blend state to build function from
-PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitContext, const BLEND_COMPILE_STATE& state);
-
-}
-
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
deleted file mode 100644
index e54e23fc904..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2017-2020 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file jit_pch.hpp
- *
- * @brief Pre-compiled header for jitter
- *
- * Notes:
- *
- ******************************************************************************/
-
-#pragma once
-
-#if defined(_MSC_VER)
-#pragma warning(disable : 4146 4244 4267 4800 4996)
-#endif
-
-#include <llvm/Config/llvm-config.h>
-
-#if LLVM_VERSION_MAJOR < 7
-// llvm 3.7+ reuses "DEBUG" as an enum value
-#pragma push_macro("DEBUG")
-#undef DEBUG
-#endif
-
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#if LLVM_VERSION_MAJOR >= 10
-#include "llvm/IR/IntrinsicsX86.h"
-#endif
-#include "llvm/ExecutionEngine/ObjectCache.h"
-
-#include "llvm/IR/Verifier.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/Support/FileSystem.h"
-#define LLVM_F_NONE sys::fs::F_None
-
-#include "llvm/Analysis/Passes.h"
-
-#include "llvm/IR/LegacyPassManager.h"
-using FunctionPassManager = llvm::legacy::FunctionPassManager;
-using PassManager = llvm::legacy::PassManager;
-
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#if LLVM_VERSION_MAJOR >= 7
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/InstCombine/InstCombine.h"
-#endif
-#include "llvm/Support/Host.h"
-#include "llvm/Support/DynamicLibrary.h"
-
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-
-#include "llvm/Transforms/Utils/Cloning.h"
-
-#if defined(_WIN32)
-#include "llvm/ADT/Triple.h"
-#endif
-#include "llvm/IR/Function.h"
-
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-
-#include "llvm/Analysis/CFGPrinter.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Config/llvm-config.h"
-
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-
-#if LLVM_USE_INTEL_JITEVENTS
-#include "llvm/ExecutionEngine/JITEventListener.h"
-#endif
-
-#if LLVM_VERSION_MAJOR >= 5
-static const auto Sync_CrossThread = llvm::SyncScope::System;
-static const auto Attrib_FunctionIndex = llvm::AttributeList::FunctionIndex;
-static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx,
- const llvm::AttrBuilder& b)
-{
- return llvm::AttributeSet::get(ctx, b);
-}
-#else
-static const auto Sync_CrossThread = llvm::SynchronizationScope::CrossThread;
-static const auto Attrib_FunctionIndex = llvm::AttributeSet::FunctionIndex;
-static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx,
- const llvm::AttrBuilder& b)
-{
- return llvm::AttributeSet::get(ctx, Attrib_FunctionIndex, b);
-}
-#endif
-
-#if LLVM_VERSION_MAJOR >= 11
-static inline llvm::VectorType* getVectorType(llvm::Type *ElementType, unsigned NumElements)
-{
- return llvm::VectorType::get(ElementType, NumElements, false);
-}
-#else
-static inline llvm::VectorType* getVectorType(llvm::Type *ElementType, unsigned NumElements)
-{
- return llvm::VectorType::get(ElementType, NumElements);
-}
-#endif
-
-#if LLVM_VERSION_MAJOR < 7
-#pragma pop_macro("DEBUG")
-#endif
-
-#if LLVM_VERSION_MAJOR > 10
- typedef unsigned IntrinsicID;
- typedef llvm::Align AlignType;
-#else
- typedef llvm::Intrinsic::ID IntrinsicID;
- typedef unsigned AlignType;
-#endif
-
-#include <deque>
-#include <list>
-#include <unordered_map>
-#include <unordered_set>
-#include <iostream>
-#include <sstream>
-#include <type_traits>
-#include <cstdint>
-#include <vector>
-#include <tuple>
-#include <mutex>
-
-#include "common/os.h"
-
-#if defined(_WIN32)
-#define JIT_OBJ_EXT ".obj"
-#else
-#define JIT_OBJ_EXT ".o"
-#endif // _WIN32
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/meson.build b/src/gallium/drivers/swr/rasterizer/jitter/meson.build
deleted file mode 100644
index 295dc2fccb5..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/meson.build
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright © 2017-2018 Intel Corporation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-if dep_llvm.type_name() == 'internal'
- _irbuilder_h = subproject('llvm').get_variable('irbuilder_h')
-else
- _llvm_includedir = dep_llvm.get_variable(configtool : 'includedir', cmake : 'LLVM_INCLUDE_DIR')
- _irbuilder_h = join_paths(_llvm_includedir, 'llvm', 'IR', 'IRBuilder.h')
-endif
-
-gen_builder_hpp = custom_target(
- 'gen_builder.hpp',
- input : [
- swr_gen_llvm_ir_macros_py, _irbuilder_h,
- ],
- output : 'gen_builder.hpp',
- command : [
- prog_python, '@INPUT0@', '--input', '@INPUT1@', '--output', '@OUTPUT@',
- '--gen_h', '--output-dir', '@OUTDIR@'
- ],
- depend_files : swr_gen_builder_depends,
- build_by_default : true,
-)
-
-gen_builder_meta_hpp = custom_target(
- 'gen_builder_meta.hpp',
- input : '../codegen/gen_llvm_ir_macros.py',
- output : 'gen_builder_meta.hpp',
- command : [
- prog_python, '@INPUT0@', '--gen_meta_h', '--output', '@OUTPUT@',
- '--output-dir', '@OUTDIR@'
- ],
- depend_files : swr_gen_builder_depends,
-)
-
-gen_builder_intrin_hpp = custom_target(
- 'gen_builder_intrin.hpp',
- input : '../codegen/gen_llvm_ir_macros.py',
- output : 'gen_builder_intrin.hpp',
- command : [
- prog_python, '@INPUT0@', '--gen_intrin_h', '--output', '@OUTPUT@',
- '--output-dir', '@OUTDIR@'
- ],
- depend_files : swr_gen_builder_depends,
-)
-
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp b/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp
deleted file mode 100644
index 1c9db0c2d2a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file DebugOutput.cpp
- *
- * @brief Shader support library implementation for printed Debug output
- *
- * Notes:
- *
- ******************************************************************************/
-#include <stdarg.h>
-#include "common/os.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief called in JIT code, inserted by PRINT
-/// output to both stdout and visual studio debug console
-extern "C" void CallPrint(const char* fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- vprintf(fmt, args);
-
-#if defined(_WIN32)
- char strBuf[1024];
- vsnprintf_s(strBuf, _TRUNCATE, fmt, args);
- OutputDebugStringA(strBuf);
-#endif
-
- va_end(args);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp b/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp
deleted file mode 100644
index 925d57f5d47..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/Scatter.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file Scatter.cpp
- *
- * @brief Shader support library implementation for scatter emulation
- *
- * Notes:
- *
- ******************************************************************************/
-#include <stdarg.h>
-#include "common/os.h"
-#include "common/simdlib.hpp"
-
-extern "C" void ScatterPS_256(uint8_t* pBase, SIMD256::Integer vIndices, SIMD256::Float vSrc, uint8_t mask, uint32_t scale)
-{
- OSALIGN(float, 32) src[8];
- OSALIGN(uint32_t, 32) indices[8];
-
- SIMD256::store_ps(src, vSrc);
- SIMD256::store_si((SIMD256::Integer*)indices, vIndices);
-
- unsigned long index;
- while (_BitScanForward(&index, mask))
- {
- mask &= ~(1 << index);
-
- *(float*)(pBase + indices[index] * scale) = src[index];
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
deleted file mode 100644
index 72e1261a4b3..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ /dev/null
@@ -1,379 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file streamout_jit.cpp
- *
- * @brief Implementation of the streamout jitter
- *
- * Notes:
- *
- ******************************************************************************/
-#include "jit_pch.hpp"
-#include "builder_gfx_mem.h"
-#include "jit_api.h"
-#include "streamout_jit.h"
-#include "gen_state_llvm.h"
-#include "functionpasses/passes.h"
-
-using namespace llvm;
-using namespace SwrJit;
-
-//////////////////////////////////////////////////////////////////////////
-/// Interface to Jitting a fetch shader
-//////////////////////////////////////////////////////////////////////////
-struct StreamOutJit : public BuilderGfxMem
-{
- StreamOutJit(JitManager* pJitMgr) : BuilderGfxMem(pJitMgr){};
-
- // returns pointer to SWR_STREAMOUT_BUFFER
- Value* getSOBuffer(Value* pSoCtx, uint32_t buffer)
- {
- return LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_pBuffer, buffer});
- }
-
- //////////////////////////////////////////////////////////////////////////
- // @brief checks if streamout buffer is oob
- // @return <i1> true/false
- Value* oob(const STREAMOUT_COMPILE_STATE& state, Value* pSoCtx, uint32_t buffer)
- {
- Value* returnMask = C(false);
-
- Value* pBuf = getSOBuffer(pSoCtx, buffer);
-
- // load enable
- // @todo bool data types should generate <i1> llvm type
- Value* enabled = TRUNC(LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_enable}), IRB()->getInt1Ty());
-
- // load buffer size
- Value* bufferSize = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_bufferSize});
-
- // load current streamOffset
- Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
-
- // load buffer pitch
- Value* pitch = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
-
- // buffer is considered oob if in use in a decl but not enabled
- returnMask = OR(returnMask, NOT(enabled));
-
- // buffer is oob if cannot fit a prims worth of verts
- Value* newOffset = ADD(streamOffset, MUL(pitch, C(state.numVertsPerPrim)));
- returnMask = OR(returnMask, ICMP_SGT(newOffset, bufferSize));
-
- return returnMask;
- }
-
- //////////////////////////////////////////////////////////////////////////
- // @brief converts scalar bitmask to <4 x i32> suitable for shuffle vector,
- // packing the active mask bits
- // ex. bitmask 0011 -> (0, 1, 0, 0)
- // bitmask 1000 -> (3, 0, 0, 0)
- // bitmask 1100 -> (2, 3, 0, 0)
- Value* PackMask(uint32_t bitmask)
- {
- std::vector<Constant*> indices(4, C(0));
- unsigned long index;
- uint32_t elem = 0;
- while (_BitScanForward(&index, bitmask))
- {
- indices[elem++] = C((int)index);
- bitmask &= ~(1 << index);
- }
-
- return ConstantVector::get(indices);
- }
-
- //////////////////////////////////////////////////////////////////////////
- // @brief convert scalar bitmask to <4xfloat> bitmask
- Value* ToMask(uint32_t bitmask)
- {
- std::vector<Constant*> indices;
- for (uint32_t i = 0; i < 4; ++i)
- {
- if (bitmask & (1 << i))
- {
- indices.push_back(C(true));
- }
- else
- {
- indices.push_back(C(false));
- }
- }
- return ConstantVector::get(indices);
- }
-
- //////////////////////////////////////////////////////////////////////////
- // @brief processes a single decl from the streamout stream. Reads 4 components from the input
- // stream and writes N components to the output buffer given the componentMask or if
- // a hole, just increments the buffer pointer
- // @param pStream - pointer to current attribute
- // @param pOutBuffers - pointers to the current location of each output buffer
- // @param decl - input decl
- void buildDecl(Value* pStream, Value* pOutBuffers[4], const STREAMOUT_DECL& decl)
- {
- uint32_t numComponents = _mm_popcnt_u32(decl.componentMask);
- uint32_t packedMask = (1 << numComponents) - 1;
- if (!decl.hole)
- {
- // increment stream pointer to correct slot
- Value* pAttrib = GEP(pStream, C(4 * decl.attribSlot));
-
- // load 4 components from stream
- Type* simd4Ty = getVectorType(IRB()->getFloatTy(), 4);
- Type* simd4PtrTy = PointerType::get(simd4Ty, 0);
- pAttrib = BITCAST(pAttrib, simd4PtrTy);
- Value* vattrib = LOAD(pAttrib);
-
- // shuffle/pack enabled components
- Value* vpackedAttrib = VSHUFFLE(vattrib, vattrib, PackMask(decl.componentMask));
-
- // store to output buffer
- // cast SO buffer to i8*, needed by maskstore
- Value* pOut = BITCAST(pOutBuffers[decl.bufferIndex], PointerType::get(simd4Ty, 0));
-
- // cast input to <4xfloat>
- Value* src = BITCAST(vpackedAttrib, simd4Ty);
-
- // cast mask to <4xi1>
- Value* mask = ToMask(packedMask);
- MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
- }
-
- // increment SO buffer
- pOutBuffers[decl.bufferIndex] = GEP(pOutBuffers[decl.bufferIndex], C(numComponents));
- }
-
- //////////////////////////////////////////////////////////////////////////
- // @brief builds a single vertex worth of data for the given stream
- // @param streamState - state for this stream
- // @param pCurVertex - pointer to src stream vertex data
- // @param pOutBuffer - pointers to up to 4 SO buffers
- void buildVertex(const STREAMOUT_STREAM& streamState, Value* pCurVertex, Value* pOutBuffer[4])
- {
- for (uint32_t d = 0; d < streamState.numDecls; ++d)
- {
- const STREAMOUT_DECL& decl = streamState.decl[d];
- buildDecl(pCurVertex, pOutBuffer, decl);
- }
- }
-
- void buildStream(const STREAMOUT_COMPILE_STATE& state,
- const STREAMOUT_STREAM& streamState,
- Value* pSoCtx,
- BasicBlock* returnBB,
- Function* soFunc)
- {
- // get list of active SO buffers
- std::unordered_set<uint32_t> activeSOBuffers;
- for (uint32_t d = 0; d < streamState.numDecls; ++d)
- {
- const STREAMOUT_DECL& decl = streamState.decl[d];
- activeSOBuffers.insert(decl.bufferIndex);
- }
-
- // always increment numPrimStorageNeeded
- Value* numPrimStorageNeeded = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded});
- numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
- STORE(numPrimStorageNeeded, pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded});
-
- // check OOB on active SO buffers. If any buffer is out of bound, don't write
- // the primitive to any buffer
- Value* oobMask = C(false);
- for (uint32_t buffer : activeSOBuffers)
- {
- oobMask = OR(oobMask, oob(state, pSoCtx, buffer));
- }
-
- BasicBlock* validBB = BasicBlock::Create(JM()->mContext, "valid", soFunc);
-
- // early out if OOB
- COND_BR(oobMask, returnBB, validBB);
-
- IRB()->SetInsertPoint(validBB);
-
- Value* numPrimsWritten = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimsWritten});
- numPrimsWritten = ADD(numPrimsWritten, C(1));
- STORE(numPrimsWritten, pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimsWritten});
-
- // compute start pointer for each output buffer
- Value* pOutBuffer[4];
- Value* pOutBufferStartVertex[4];
- Value* outBufferPitch[4];
- for (uint32_t b : activeSOBuffers)
- {
- Value* pBuf = getSOBuffer(pSoCtx, b);
- Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
- Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
- pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
- pOutBufferStartVertex[b] = pOutBuffer[b];
-
- outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
- }
-
- // loop over the vertices of the prim
- Value* pStreamData = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_pPrimData});
- for (uint32_t v = 0; v < state.numVertsPerPrim; ++v)
- {
- buildVertex(streamState, pStreamData, pOutBuffer);
-
- // increment stream and output buffer pointers
- // stream verts are always 32*4 dwords apart
- pStreamData = GEP(pStreamData, C(SWR_VTX_NUM_SLOTS * 4));
-
- // output buffers offset using pitch in buffer state
- for (uint32_t b : activeSOBuffers)
- {
- pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]);
- pOutBuffer[b] = pOutBufferStartVertex[b];
- }
- }
-
- // update each active buffer's streamOffset
- for (uint32_t b : activeSOBuffers)
- {
- Value* pBuf = getSOBuffer(pSoCtx, b);
- Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
- streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b]));
- STORE(streamOffset, pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
- }
- }
-
- Function* Create(const STREAMOUT_COMPILE_STATE& state)
- {
- std::stringstream fnName("SO_",
- std::ios_base::in | std::ios_base::out | std::ios_base::ate);
- fnName << ComputeCRC(0, &state, sizeof(state));
-
- std::vector<Type*> args{
- mInt8PtrTy,
- mInt8PtrTy,
- PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
- };
-
- FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
- Function* soFunc = Function::Create(
- fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
-
- soFunc->getParent()->setModuleIdentifier(soFunc->getName());
-
- // create return basic block
- BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
- BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc);
-
- IRB()->SetInsertPoint(entry);
-
- // arguments
- auto argitr = soFunc->arg_begin();
-
- Value* privateContext = &*argitr++;
- privateContext->setName("privateContext");
- SetPrivateContext(privateContext);
-
- mpWorkerData = &*argitr;
- ++argitr;
- mpWorkerData->setName("pWorkerData");
-
- Value* pSoCtx = &*argitr++;
- pSoCtx->setName("pSoCtx");
-
- const STREAMOUT_STREAM& streamState = state.stream;
- buildStream(state, streamState, pSoCtx, returnBB, soFunc);
-
- BR(returnBB);
-
- IRB()->SetInsertPoint(returnBB);
- RET_VOID();
-
- JitManager::DumpToFile(soFunc, "SoFunc");
-
- ::FunctionPassManager passes(JM()->mpCurrentModule);
-
- passes.add(createBreakCriticalEdgesPass());
- passes.add(createCFGSimplificationPass());
- passes.add(createEarlyCSEPass());
- passes.add(createPromoteMemoryToRegisterPass());
- passes.add(createCFGSimplificationPass());
- passes.add(createEarlyCSEPass());
- passes.add(createInstructionCombiningPass());
-#if LLVM_VERSION_MAJOR <= 11
- passes.add(createConstantPropagationPass());
-#endif
- passes.add(createSCCPPass());
- passes.add(createAggressiveDCEPass());
-
- passes.add(createLowerX86Pass(this));
-
- passes.run(*soFunc);
-
- JitManager::DumpToFile(soFunc, "SoFunc_optimized");
-
-
- return soFunc;
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JITs from streamout shader IR
-/// @param hJitMgr - JitManager handle
-/// @param func - LLVM function IR
-/// @return PFN_SO_FUNC - pointer to SOS function
-PFN_SO_FUNC JitStreamoutFunc(HANDLE hJitMgr, const HANDLE hFunc)
-{
- llvm::Function* func = (llvm::Function*)hFunc;
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
- PFN_SO_FUNC pfnStreamOut;
- pfnStreamOut = (PFN_SO_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
- // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
- // add new IR to the module
- pJitMgr->mIsModuleFinalized = true;
-
- pJitMgr->DumpAsm(func, "SoFunc_optimized");
-
-
- return pfnStreamOut;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief JIT compiles streamout shader
-/// @param hJitMgr - JitManager handle
-/// @param state - SO state to build function from
-extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitMgr,
- const STREAMOUT_COMPILE_STATE& state)
-{
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
-
- STREAMOUT_COMPILE_STATE soState = state;
- if (soState.offsetAttribs)
- {
- for (uint32_t i = 0; i < soState.stream.numDecls; ++i)
- {
- soState.stream.decl[i].attribSlot -= soState.offsetAttribs;
- }
- }
-
- pJitMgr->SetupNewModule();
-
- StreamOutJit theJit(pJitMgr);
- HANDLE hFunc = theJit.Create(soState);
-
- return JitStreamoutFunc(hJitMgr, hFunc);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h
deleted file mode 100644
index d76fcdd5742..00000000000
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * @file streamout_jit.h
- *
- * @brief Definition of the streamout jitter
- *
- * Notes:
- *
- ******************************************************************************/
-#pragma once
-
-#include "common/formats.h"
-#include "core/state.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// STREAMOUT_DECL - Stream decl
-//////////////////////////////////////////////////////////////////////////
-struct STREAMOUT_DECL
-{
- // Buffer that stream maps to.
- DWORD bufferIndex;
-
- // attribute to stream
- uint32_t attribSlot;
-
- // attribute component mask
- uint32_t componentMask;
-
- // indicates this decl is a hole
- bool hole;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// STREAMOUT_STREAM - Stream decls
-//////////////////////////////////////////////////////////////////////////
-struct STREAMOUT_STREAM
-{
- // number of decls for this stream
- uint32_t numDecls;
-
- // array of numDecls decls
- STREAMOUT_DECL decl[128];
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// State required for streamout jit
-//////////////////////////////////////////////////////////////////////////
-struct STREAMOUT_COMPILE_STATE
-{
- // number of verts per primitive
- uint32_t numVertsPerPrim;
- uint32_t
- offsetAttribs; ///< attrib offset to subtract from all STREAMOUT_DECL::attribSlot values.
-
- uint64_t streamMask;
-
- // stream decls
- STREAMOUT_STREAM stream;
-
- bool operator==(const STREAMOUT_COMPILE_STATE& other) const
- {
- if (numVertsPerPrim != other.numVertsPerPrim)
- return false;
- if (stream.numDecls != other.stream.numDecls)
- return false;
-
- for (uint32_t i = 0; i < stream.numDecls; ++i)
- {
- if (stream.decl[i].bufferIndex != other.stream.decl[i].bufferIndex)
- return false;
- if (stream.decl[i].attribSlot != other.stream.decl[i].attribSlot)
- return false;
- if (stream.decl[i].componentMask != other.stream.decl[i].componentMask)
- return false;
- if (stream.decl[i].hole != other.stream.decl[i].hole)
- return false;
- }
-
- return true;
- }
-};
diff --git a/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp
deleted file mode 100644
index 6a528b6a0f2..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/ClearTile.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ClearTile.cpp
-*
-* @brief Functionality for ClearTile. StoreHotTileClear clears a single macro
-* tile in the destination.
-*
-******************************************************************************/
-#include "common/os.h"
-#include "core/context.h"
-#include "common/formats.h"
-#include "memory/TilingFunctions.h"
-#include "memory/tilingtraits.h"
-#include "memory/Convert.h"
-
-typedef void(*PFN_STORE_TILES_CLEAR)(const float*, SWR_SURFACE_STATE*, UINT, UINT, uint32_t);
-
-//////////////////////////////////////////////////////////////////////////
-/// Clear Raster Tile Function Tables.
-//////////////////////////////////////////////////////////////////////////
-static PFN_STORE_TILES_CLEAR sStoreTilesClearColorTable[NUM_SWR_FORMATS];
-
-static PFN_STORE_TILES_CLEAR sStoreTilesClearDepthTable[NUM_SWR_FORMATS];
-
-//////////////////////////////////////////////////////////////////////////
-/// StoreRasterTileClear
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct StoreRasterTileClear
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pColor - Pointer to clear color.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void StoreClear(
- const uint8_t* dstFormattedColor,
- UINT dstBytesPerPixel,
- SWR_SURFACE_STATE* pDstSurface,
- UINT x, UINT y, // (x, y) pixel coordinate to start of raster tile.
- uint32_t renderTargetArrayIndex)
- {
- // If we're outside of the surface, stop.
- uint32_t lodWidth = std::max<uint32_t>(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max<uint32_t>(pDstSurface->height >> pDstSurface->lod, 1U);
- if (x >= lodWidth || y >= lodHeight)
- return;
-
- // Compute destination address for raster tile.
- uint8_t* pDstTile = (uint8_t*)ComputeSurfaceAddress<false, false>(
- x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex,
- 0, // sampleNum
- pDstSurface->lod,
- pDstSurface);
-
- // start of first row
- uint8_t* pDst = pDstTile;
- UINT dstBytesPerRow = 0;
-
- // For each raster tile pixel in row 0 (rx, 0)
- for (UINT rx = 0; (rx < KNOB_TILE_X_DIM) && ((x + rx) < lodWidth); ++rx)
- {
- memcpy(pDst, dstFormattedColor, dstBytesPerPixel);
-
- // Increment pointer to next pixel in row.
- pDst += dstBytesPerPixel;
- dstBytesPerRow += dstBytesPerPixel;
- }
-
- // start of second row
- pDst = pDstTile + pDstSurface->pitch;
-
- // For each remaining row in the rest of the raster tile
- for (UINT ry = 1; (ry < KNOB_TILE_Y_DIM) && ((y + ry) < lodHeight); ++ry)
- {
- // copy row
- memcpy(pDst, pDstTile, dstBytesPerRow);
-
- // Increment pointer to first pixel in next row.
- pDst += pDstSurface->pitch;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StoreMacroTileClear - Stores a macro tile clear to its raster tiles.
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct StoreMacroTileClear
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores a macrotile to the destination surface.
- /// @param pColor - Pointer to color to write to pixels.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to macro tile
- static void StoreClear(
- const float *pColor,
- SWR_SURFACE_STATE* pDstSurface,
- UINT x, UINT y, uint32_t renderTargetArrayIndex)
- {
- UINT dstBytesPerPixel = (FormatTraits<DstFormat>::bpp / 8);
-
- uint8_t dstFormattedColor[16]; // max bpp is 128, so 16 is all we need here for one pixel
-
- float srcColor[4];
-
- for (UINT comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
- {
- srcColor[comp] = pColor[FormatTraits<DstFormat>::swizzle(comp)];
- }
-
- // using this helper function, but the Tiling Traits is unused inside it so just using a dummy value
- ConvertPixelFromFloat<DstFormat>(dstFormattedColor, srcColor);
-
- // Store each raster tile from the hot tile to the destination surface.
- // TODO: Put in check for partial coverage on x/y -- SWR_ASSERT if it happens.
- // Intent is for this function to only handle full tiles.
- for (UINT row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for (UINT col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- StoreRasterTileClear<SrcFormat, DstFormat>::StoreClear(dstFormattedColor, dstBytesPerPixel, pDstSurface, (x + col), (y + row), renderTargetArrayIndex);
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Writes clear color to every pixel of a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param renderTargetIndex - Index to destination render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pClearColor - Pointer to clear color
-void SwrStoreHotTileClear(
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE *pDstSurface,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- UINT x,
- UINT y,
- uint32_t renderTargetArrayIndex,
- const float* pClearColor)
-{
- PFN_STORE_TILES_CLEAR pfnStoreTilesClear = NULL;
-
- if (renderTargetIndex == SWR_ATTACHMENT_STENCIL)
- {
- SWR_ASSERT(pDstSurface->format == R8_UINT);
- pfnStoreTilesClear = StoreMacroTileClear<R8_UINT, R8_UINT>::StoreClear;
- }
- else if (renderTargetIndex == SWR_ATTACHMENT_DEPTH)
- {
- pfnStoreTilesClear = sStoreTilesClearDepthTable[pDstSurface->format];
- }
- else
- {
- pfnStoreTilesClear = sStoreTilesClearColorTable[pDstSurface->format];
- }
-
- SWR_ASSERT(pfnStoreTilesClear != NULL);
-
- // Store a macro tile.
- /// @todo Once all formats are supported then if check can go away. This is to help us near term to make progress.
- if (pfnStoreTilesClear != NULL)
- {
- pfnStoreTilesClear(pClearColor, pDstSurface, x, y, renderTargetArrayIndex);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
-#define INIT_STORE_TILES_CLEAR_COLOR_TABLE() \
- memset(sStoreTilesClearColorTable, 0, sizeof(sStoreTilesClearColorTable)); \
- \
- sStoreTilesClearColorTable[R32G32B32A32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32B32A32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32A32_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32B32A32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32A32_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32B32X32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32X32_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32B32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32B32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32B32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32B32_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16A16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16A16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16A16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16A16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16A16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16A16_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R32G32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32G32_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16X16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16X16_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16X16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16X16_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[B8G8R8A8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8A8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B8G8R8A8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R10G10B10A2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R10G10B10A2_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R10G10B10A2_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8A8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8A8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8A8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8A8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8A8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8A8_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[B10G10R10A2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B10G10R10A2_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R11G11B10_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R11G11B10_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R32_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R32_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R32_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[A32_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, A32_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[B8G8R8X8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8X8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B8G8R8X8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8X8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8X8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8X8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[B10G10R10X2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10X2_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B5G6R5_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G6R5_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B5G6R5_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G6R5_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[B5G5R5A1_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5A1_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B5G5R5A1_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5A1_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[B4G4R4A4_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B4G4R4A4_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B4G4R4A4_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B4G4R4A4_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R8G8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R8G8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[A16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, A16_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[A16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, A16_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[B5G5R5X1_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5X1_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[B5G5R5X1_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, B5G5R5X1_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8_UINT>::StoreClear; \
- sStoreTilesClearColorTable[A8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, A8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC1_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC1_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC2_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC2_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC3_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC3_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC4_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC4_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC5_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC5_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC1_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC1_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[BC2_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC2_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[BC3_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC3_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC4_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC4_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[BC5_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, BC5_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16_FLOAT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_FLOAT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16_UNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_UNORM>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8_UNORM_SRGB] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_UNORM_SRGB>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R16G16B16_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R16G16B16_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R10G10B10A2_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[R10G10B10A2_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R10G10B10A2_SINT>::StoreClear; \
- sStoreTilesClearColorTable[B10G10R10A2_SNORM] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_SNORM>::StoreClear; \
- sStoreTilesClearColorTable[B10G10R10A2_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_UINT>::StoreClear; \
- sStoreTilesClearColorTable[B10G10R10A2_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, B10G10R10A2_SINT>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8_UINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_UINT>::StoreClear; \
- sStoreTilesClearColorTable[R8G8B8_SINT] = StoreMacroTileClear<R32G32B32A32_FLOAT, R8G8B8_SINT>::StoreClear;
-
-//////////////////////////////////////////////////////////////////////////
-/// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
-#define INIT_STORE_TILES_CLEAR_DEPTH_TABLE() \
- memset(sStoreTilesClearDepthTable, 0, sizeof(sStoreTilesClearDepthTable)); \
- \
- sStoreTilesClearDepthTable[R32_FLOAT] = StoreMacroTileClear<R32_FLOAT, R32_FLOAT>::StoreClear; \
- sStoreTilesClearDepthTable[R32_FLOAT_X8X24_TYPELESS] = StoreMacroTileClear<R32_FLOAT, R32_FLOAT_X8X24_TYPELESS>::StoreClear; \
- sStoreTilesClearDepthTable[R24_UNORM_X8_TYPELESS] = StoreMacroTileClear<R32_FLOAT, R24_UNORM_X8_TYPELESS>::StoreClear; \
- sStoreTilesClearDepthTable[R16_UNORM] = StoreMacroTileClear<R32_FLOAT, R16_UNORM>::StoreClear;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Sets up tables for ClearTile
-void InitSimClearTilesTable()
-{
- INIT_STORE_TILES_CLEAR_COLOR_TABLE();
- INIT_STORE_TILES_CLEAR_DEPTH_TABLE();
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
deleted file mode 100644
index c8c6b30daff..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
+++ /dev/null
@@ -1,730 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file Convert.h
-*
-* @brief Conversion utility functions
-*
-******************************************************************************/
-#pragma once
-
-#if defined(_MSC_VER)
-// disable "potential divide by 0"
-#pragma warning(disable: 4723)
-#endif
-
-#include <cmath>
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Convert an IEEE 754 16-bit float to an 32-bit single precision
-/// float
-/// @param val - 16-bit float
-/// @todo Maybe move this outside of this file into a header?
-static INLINE float ConvertSmallFloatTo32(UINT val)
-{
- UINT result;
- if ((val & 0x7fff) == 0)
- {
- result = ((uint32_t)(val & 0x8000)) << 16;
- }
- else if ((val & 0x7c00) == 0x7c00)
- {
- result = ((val & 0x3ff) == 0) ? 0x7f800000 : 0x7fc00000;
- result |= ((uint32_t)val & 0x8000) << 16;
- }
- else
- {
- uint32_t sign = (val & 0x8000) << 16;
- uint32_t mant = (val & 0x3ff) << 13;
- uint32_t exp = (val >> 10) & 0x1f;
- if ((exp == 0) && (mant != 0)) // Adjust exponent and mantissa for denormals
- {
- mant <<= 1;
- while (mant < (0x400 << 13))
- {
- exp--;
- mant <<= 1;
- }
- mant &= (0x3ff << 13);
- }
- exp = ((exp - 15 + 127) & 0xff) << 23;
- result = sign | exp | mant;
- }
-
- return *(float*)&result;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Convert an IEEE 754 32-bit single precision float to an
-/// unsigned small float with 5 exponent bits and a variable
-/// number of mantissa bits.
-/// @param val - 32-bit float
-/// @todo Maybe move this outside of this file into a header?
-template<UINT numMantissaBits>
-static UINT Convert32ToSmallFloat(float val)
-{
- uint32_t sign, exp, mant;
- uint32_t roundBits;
-
- // Extract the sign, exponent, and mantissa
- UINT uf = *(UINT*)&val;
-
- sign = (uf & 0x80000000) >> 31;
- exp = (uf & 0x7F800000) >> 23;
- mant = uf & 0x007FFFFF;
-
- // 10/11 bit floats are unsigned. Negative values are clamped to 0.
- if (sign != 0)
- {
- exp = mant = 0;
- }
- // Check for out of range
- else if ((exp == 0xFF) && (mant != 0)) // NaN
- {
- exp = 0x1F;
- mant = 1 << numMantissaBits;
- }
- else if ((exp == 0xFF) && (mant == 0)) // INF
- {
- exp = 0x1F;
- mant = 0;
- }
- else if (exp > (0x70 + 0x1E)) // Too big to represent
- {
- exp = 0x1Eu;
- mant = (1 << numMantissaBits) - 1; // 0x3F for 6 bit mantissa.
- }
- else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
- {
- mant |= 0x00800000;
- for (; exp <= 0x70; mant >>= 1, exp++)
- ;
- exp = 0;
- mant = mant >> (23 - numMantissaBits);
- }
- else if (exp < 0x66) // Too small to represent -> Zero
- {
- exp = 0;
- mant = 0;
- }
- else
- {
- // Saves bits that will be shifted off for rounding
- roundBits = mant & 0x1FFFu;
- // convert exponent and mantissa to 16 bit format
- exp = exp - 0x70u;
- mant = mant >> (23 - numMantissaBits);
-
- // Essentially RTZ, but round up if off by only 1 lsb
- if (roundBits == 0x1FFFu)
- {
- mant++;
- // check for overflow
- if ((mant & (0x3 << numMantissaBits)) != 0) // 0x60 = 0x3 << (num Mantissa Bits)
- exp++;
- // make sure only the needed bits are used
- mant &= (1 << numMantissaBits) - 1;
- }
- }
-
- UINT tmpVal = (exp << numMantissaBits) | mant;
- return tmpVal;
-}
-
-#if KNOB_ARCH == KNOB_ARCH_AVX
-//////////////////////////////////////////////////////////////////////////
-/// @brief Convert an IEEE 754 32-bit single precision float to an
-/// 16 bit float with 5 exponent bits and a variable
-/// number of mantissa bits.
-/// @param val - 32-bit float
-/// @todo Maybe move this outside of this file into a header?
-static uint16_t Convert32To16Float(float val)
-{
- uint32_t sign, exp, mant;
- uint32_t roundBits;
-
- // Extract the sign, exponent, and mantissa
- uint32_t uf = *(uint32_t*)&val;
- sign = (uf & 0x80000000) >> 31;
- exp = (uf & 0x7F800000) >> 23;
- mant = uf & 0x007FFFFF;
-
- // Check for out of range
- if (std::isnan(val))
- {
- exp = 0x1F;
- mant = 0x200;
- sign = 1; // set the sign bit for NANs
- }
- else if (std::isinf(val))
- {
- exp = 0x1f;
- mant = 0x0;
- }
- else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
- {
- exp = 0x1E;
- mant = 0x3FF;
- }
- else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
- {
- mant |= 0x00800000;
- for (; exp <= 0x70; mant >>= 1, exp++)
- ;
- exp = 0;
- mant = mant >> 13;
- }
- else if (exp < 0x66) // Too small to represent -> Zero
- {
- exp = 0;
- mant = 0;
- }
- else
- {
- // Saves bits that will be shifted off for rounding
- roundBits = mant & 0x1FFFu;
- // convert exponent and mantissa to 16 bit format
- exp = exp - 0x70;
- mant = mant >> 13;
-
- // Essentially RTZ, but round up if off by only 1 lsb
- if (roundBits == 0x1FFFu)
- {
- mant++;
- // check for overflow
- if ((mant & 0xC00u) != 0)
- exp++;
- // make sure only the needed bits are used
- mant &= 0x3FF;
- }
- }
-
- uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
- return (uint16_t)tmpVal;
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Retrieve color from hot tile source which is always float.
-/// @param pDstPixel - Pointer to destination pixel.
-/// @param srcPixel - Pointer to source pixel (pre-swizzled according to dest).
-template<SWR_FORMAT DstFormat>
-static void ConvertPixelFromFloat(
- uint8_t* pDstPixel,
- const float srcPixel[4])
-{
- uint32_t outColor[4] = { 0 }; // typeless bits
-
- // Store component
- for (UINT comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
- {
- SWR_TYPE type = FormatTraits<DstFormat>::GetType(comp);
-
- float src = srcPixel[comp];
-
- switch (type)
- {
- case SWR_TYPE_UNORM:
- {
- // Force NaN to 0. IEEE standard, comparisons involving NaN always evaluate to false.
- src = (src != src) ? 0.0f : src;
-
- // Clamp [0, 1]
- src = std::max(src, 0.0f);
- src = std::min(src, 1.0f);
-
- // SRGB
- if (FormatTraits<DstFormat>::isSRGB && comp != 3)
- {
- src = (src <= 0.0031308f) ? (12.92f * src) : (1.055f * powf(src, (1.0f / 2.4f)) - 0.055f);
- }
-
- // Float scale to integer scale.
- UINT scale = (1 << FormatTraits<DstFormat>::GetBPC(comp)) - 1;
- src = (float)scale * src;
- src = roundf(src);
- outColor[comp] = (UINT)src; // Drop fractional part.
- break;
- }
- case SWR_TYPE_SNORM:
- {
- SWR_ASSERT(!FormatTraits<DstFormat>::isSRGB);
-
- // Force NaN to 0. IEEE standard, comparisons involving NaN always evaluate to false.
- src = (src != src) ? 0.0f : src;
-
- // Clamp [-1, 1]
- src = std::max(src, -1.0f);
- src = std::min(src, 1.0f);
-
- // Float scale to integer scale.
- UINT scale = (1 << (FormatTraits<DstFormat>::GetBPC(comp) - 1)) - 1;
- src = (float)scale * src;
-
- // Round
- src += (src >= 0) ? 0.5f : -0.5f;
-
- INT out = (INT)src;
-
- outColor[comp] = *(UINT*)&out;
-
- break;
- }
- case SWR_TYPE_UINT:
- {
- ///@note The *(UINT*)& is currently necessary as the hot tile appears to always be float.
- // However, the number in the hot tile should be unsigned integer. So doing this
- // to preserve bits intead of doing a float -> integer conversion.
- if (FormatTraits<DstFormat>::GetBPC(comp) == 32)
- {
- outColor[comp] = *(UINT*)&src;
- }
- else
- {
- outColor[comp] = *(UINT*)&src;
- UINT max = (1 << FormatTraits<DstFormat>::GetBPC(comp)) - 1; // 2^numBits - 1
-
- outColor[comp] = std::min(max, outColor[comp]);
- }
- break;
- }
- case SWR_TYPE_SINT:
- {
- if (FormatTraits<DstFormat>::GetBPC(comp) == 32)
- {
- outColor[comp] = *(UINT*)&src;
- }
- else
- {
- INT out = *(INT*)&src; // Hot tile format is SINT?
- INT max = (1 << (FormatTraits<DstFormat>::GetBPC(comp) - 1)) - 1;
- INT min = -1 - max;
-
- ///@note The output is unsigned integer (bag of bits) and so performing
- // the clamping here based on range of output component. Also, manually adding
- // the sign bit in the appropriate spot. Maybe a better way?
- out = std::max(out, min);
- out = std::min(out, max);
-
- outColor[comp] = *(UINT*)&out;
- }
- break;
- }
- case SWR_TYPE_FLOAT:
- {
- if (FormatTraits<DstFormat>::GetBPC(comp) == 16)
- {
- // Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
- // @todo 16bit float instruction support is orthogonal to avx support. need to
- // add check for F16C support instead.
-#if KNOB_ARCH >= KNOB_ARCH_AVX2
- __m128 src128 = _mm_set1_ps(src);
- __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
- UINT value = _mm_extract_epi16(srci128, 0);
-#else
- UINT value = Convert32To16Float(src);
-#endif
-
- outColor[comp] = value;
- }
- else if (FormatTraits<DstFormat>::GetBPC(comp) == 11)
- {
- outColor[comp] = Convert32ToSmallFloat<6>(src);
- }
- else if (FormatTraits<DstFormat>::GetBPC(comp) == 10)
- {
- outColor[comp] = Convert32ToSmallFloat<5>(src);
- }
- else
- {
- outColor[comp] = *(UINT*)&src;
- }
-
- break;
- }
- default:
- SWR_INVALID("Invalid type: %d", type);
- break;
- }
- }
-
- typename FormatTraits<DstFormat>::FormatT* pPixel = (typename FormatTraits<DstFormat>::FormatT*)pDstPixel;
-
- switch (FormatTraits<DstFormat>::numComps)
- {
- case 4:
- pPixel->a = outColor[3];
- case 3:
- pPixel->b = outColor[2];
- case 2:
- pPixel->g = outColor[1];
- case 1:
- pPixel->r = outColor[0];
- break;
- default:
- SWR_INVALID("Invalid # of comps: %d", FormatTraits<DstFormat>::numComps);
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Convert pixel in any format to float32
-/// @param pDstPixel - Pointer to destination pixel.
-/// @param srcPixel - Pointer to source pixel
-template<SWR_FORMAT SrcFormat>
-INLINE static void ConvertPixelToFloat(
- float dstPixel[4],
- const uint8_t* pSrc)
-{
- uint32_t srcColor[4]; // typeless bits
-
- // unpack src pixel
- typename FormatTraits<SrcFormat>::FormatT* pPixel = (typename FormatTraits<SrcFormat>::FormatT*)pSrc;
-
- // apply format defaults
- for (uint32_t comp = 0; comp < 4; ++comp)
- {
- uint32_t def = FormatTraits<SrcFormat>::GetDefault(comp);
- dstPixel[comp] = *(float*)&def;
- }
-
- // load format data
- switch (FormatTraits<SrcFormat>::numComps)
- {
- case 4:
- srcColor[3] = pPixel->a;
- case 3:
- srcColor[2] = pPixel->b;
- case 2:
- srcColor[1] = pPixel->g;
- case 1:
- srcColor[0] = pPixel->r;
- break;
- default:
- SWR_INVALID("Invalid # of comps: %d", FormatTraits<SrcFormat>::numComps);
- }
-
- // Convert components
- for (uint32_t comp = 0; comp < FormatTraits<SrcFormat>::numComps; ++comp)
- {
- SWR_TYPE type = FormatTraits<SrcFormat>::GetType(comp);
-
- uint32_t src = srcColor[comp];
-
- switch (type)
- {
- case SWR_TYPE_UNORM:
- {
- float dst;
- if (FormatTraits<SrcFormat>::isSRGB && comp != 3)
- {
- dst = *(float*)&srgb8Table[src];
- }
- else
- {
- // component sizes > 16 must use fp divide to maintain ulp requirements
- if (FormatTraits<SrcFormat>::GetBPC(comp) > 16)
- {
- dst = (float)src / (float)((1 << FormatTraits<SrcFormat>::GetBPC(comp)) - 1);
- }
- else
- {
- const float scale = (1.0f / (float)((1 << FormatTraits<SrcFormat>::GetBPC(comp)) - 1));
- dst = (float)src * scale;
- }
- }
- dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = dst;
- break;
- }
- case SWR_TYPE_SNORM:
- {
- SWR_ASSERT(!FormatTraits<SrcFormat>::isSRGB);
-
- float dst;
- if (src == 0x10)
- {
- dst = -1.0f;
- }
- else
- {
- switch (FormatTraits<SrcFormat>::GetBPC(comp))
- {
- case 8:
- dst = (float)((int8_t)src);
- break;
- case 16:
- dst = (float)((int16_t)src);
- break;
- case 32:
- dst = (float)((int32_t)src);
- break;
- default:
- assert(0 && "attempted to load from SNORM with unsupported bpc");
- dst = 0.0f;
- break;
- }
- dst = dst * (1.0f / ((1 << (FormatTraits<SrcFormat>::GetBPC(comp) - 1)) - 1));
- }
- dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = dst;
- break;
- }
- case SWR_TYPE_UINT:
- {
- uint32_t dst = (uint32_t)src;
- dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
- break;
- }
- case SWR_TYPE_SINT:
- {
- int dst;
- switch (FormatTraits<SrcFormat>::GetBPC(comp))
- {
- case 8:
- dst = (int8_t)src;
- break;
- case 16:
- dst = (int16_t)src;
- break;
- case 32:
- dst = (int32_t)src;
- break;
- default:
- assert(0 && "attempted to load from SINT with unsupported bpc");
- dst = 0;
- break;
- }
- dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
- break;
- }
- case SWR_TYPE_FLOAT:
- {
- float dst;
- if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)
- {
-#if KNOB_ARCH >= KNOB_ARCH_AVX2
- // Convert from 16-bit float to 32-bit float using _mm_cvtph_ps
- // @todo 16bit float instruction support is orthogonal to avx support. need to
- // add check for F16C support instead.
- __m128i src128 = _mm_set1_epi32(src);
- __m128 res = _mm_cvtph_ps(src128);
- _mm_store_ss(&dst, res);
-#else
- dst = ConvertSmallFloatTo32(src);
-#endif
- }
- else if (FormatTraits<SrcFormat>::GetBPC(comp) == 11)
- {
- dst = ConvertSmallFloatTo32(src << 4);
- }
- else if (FormatTraits<SrcFormat>::GetBPC(comp) == 10)
- {
- dst = ConvertSmallFloatTo32(src << 5);
- }
- else
- {
- dst = *(float*)&src;
- }
-
- dstPixel[FormatTraits<SrcFormat>::swizzle(comp)] = *(float*)&dst;
- break;
- }
- default:
- SWR_INVALID("Invalid type: %d", type);
- break;
- }
- }
-}
-
-// non-templated version of conversion functions
-INLINE static void ConvertPixelFromFloat(
- SWR_FORMAT format,
- uint8_t* pDst,
- const float srcPixel[4])
-{
- switch (format)
- {
- case R32G32B32A32_FLOAT: ConvertPixelFromFloat<R32G32B32A32_FLOAT>(pDst, srcPixel); break;
- case R32G32B32A32_SINT: ConvertPixelFromFloat<R32G32B32A32_SINT>(pDst, srcPixel); break;
- case R32G32B32A32_UINT: ConvertPixelFromFloat<R32G32B32A32_UINT>(pDst, srcPixel); break;
- case R32G32B32X32_FLOAT: ConvertPixelFromFloat<R32G32B32X32_FLOAT>(pDst, srcPixel); break;
- case R32G32B32A32_SSCALED: ConvertPixelFromFloat<R32G32B32A32_SSCALED>(pDst, srcPixel); break;
- case R32G32B32A32_USCALED: ConvertPixelFromFloat<R32G32B32A32_USCALED>(pDst, srcPixel); break;
- case R32G32B32_FLOAT: ConvertPixelFromFloat<R32G32B32_FLOAT>(pDst, srcPixel); break;
- case R32G32B32_SINT: ConvertPixelFromFloat<R32G32B32_SINT>(pDst, srcPixel); break;
- case R32G32B32_UINT: ConvertPixelFromFloat<R32G32B32_UINT>(pDst, srcPixel); break;
- case R32G32B32_SSCALED: ConvertPixelFromFloat<R32G32B32_SSCALED>(pDst, srcPixel); break;
- case R32G32B32_USCALED: ConvertPixelFromFloat<R32G32B32_USCALED>(pDst, srcPixel); break;
- case R16G16B16A16_UNORM: ConvertPixelFromFloat<R16G16B16A16_UNORM>(pDst, srcPixel); break;
- case R16G16B16A16_SNORM: ConvertPixelFromFloat<R16G16B16A16_SNORM>(pDst, srcPixel); break;
- case R16G16B16A16_SINT: ConvertPixelFromFloat<R16G16B16A16_SINT>(pDst, srcPixel); break;
- case R16G16B16A16_UINT: ConvertPixelFromFloat<R16G16B16A16_UINT>(pDst, srcPixel); break;
- case R16G16B16A16_FLOAT: ConvertPixelFromFloat<R16G16B16A16_FLOAT>(pDst, srcPixel); break;
- case R32G32_FLOAT: ConvertPixelFromFloat<R32G32_FLOAT>(pDst, srcPixel); break;
- case R32G32_SINT: ConvertPixelFromFloat<R32G32_SINT>(pDst, srcPixel); break;
- case R32G32_UINT: ConvertPixelFromFloat<R32G32_UINT>(pDst, srcPixel); break;
- case R32_FLOAT_X8X24_TYPELESS: ConvertPixelFromFloat<R32_FLOAT_X8X24_TYPELESS>(pDst, srcPixel); break;
- case X32_TYPELESS_G8X24_UINT: ConvertPixelFromFloat<X32_TYPELESS_G8X24_UINT>(pDst, srcPixel); break;
- case L32A32_FLOAT: ConvertPixelFromFloat<L32A32_FLOAT>(pDst, srcPixel); break;
- case R16G16B16X16_UNORM: ConvertPixelFromFloat<R16G16B16X16_UNORM>(pDst, srcPixel); break;
- case R16G16B16X16_FLOAT: ConvertPixelFromFloat<R16G16B16X16_FLOAT>(pDst, srcPixel); break;
- case L32X32_FLOAT: ConvertPixelFromFloat<L32X32_FLOAT>(pDst, srcPixel); break;
- case I32X32_FLOAT: ConvertPixelFromFloat<I32X32_FLOAT>(pDst, srcPixel); break;
- case R16G16B16A16_SSCALED: ConvertPixelFromFloat<R16G16B16A16_SSCALED>(pDst, srcPixel); break;
- case R16G16B16A16_USCALED: ConvertPixelFromFloat<R16G16B16A16_USCALED>(pDst, srcPixel); break;
- case R32G32_SSCALED: ConvertPixelFromFloat<R32G32_SSCALED>(pDst, srcPixel); break;
- case R32G32_USCALED: ConvertPixelFromFloat<R32G32_USCALED>(pDst, srcPixel); break;
- case B8G8R8A8_UNORM: ConvertPixelFromFloat<B8G8R8A8_UNORM>(pDst, srcPixel); break;
- case B8G8R8A8_UNORM_SRGB: ConvertPixelFromFloat<B8G8R8A8_UNORM_SRGB>(pDst, srcPixel); break;
- case R10G10B10A2_UNORM: ConvertPixelFromFloat<R10G10B10A2_UNORM>(pDst, srcPixel); break;
- case R10G10B10A2_UNORM_SRGB: ConvertPixelFromFloat<R10G10B10A2_UNORM_SRGB>(pDst, srcPixel); break;
- case R10G10B10A2_UINT: ConvertPixelFromFloat<R10G10B10A2_UINT>(pDst, srcPixel); break;
- case R8G8B8A8_UNORM: ConvertPixelFromFloat<R8G8B8A8_UNORM>(pDst, srcPixel); break;
- case R8G8B8A8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8A8_UNORM_SRGB>(pDst, srcPixel); break;
- case R8G8B8A8_SNORM: ConvertPixelFromFloat<R8G8B8A8_SNORM>(pDst, srcPixel); break;
- case R8G8B8A8_SINT: ConvertPixelFromFloat<R8G8B8A8_SINT>(pDst, srcPixel); break;
- case R8G8B8A8_UINT: ConvertPixelFromFloat<R8G8B8A8_UINT>(pDst, srcPixel); break;
- case R16G16_UNORM: ConvertPixelFromFloat<R16G16_UNORM>(pDst, srcPixel); break;
- case R16G16_SNORM: ConvertPixelFromFloat<R16G16_SNORM>(pDst, srcPixel); break;
- case R16G16_SINT: ConvertPixelFromFloat<R16G16_SINT>(pDst, srcPixel); break;
- case R16G16_UINT: ConvertPixelFromFloat<R16G16_UINT>(pDst, srcPixel); break;
- case R16G16_FLOAT: ConvertPixelFromFloat<R16G16_FLOAT>(pDst, srcPixel); break;
- case B10G10R10A2_UNORM: ConvertPixelFromFloat<B10G10R10A2_UNORM>(pDst, srcPixel); break;
- case B10G10R10A2_UNORM_SRGB: ConvertPixelFromFloat<B10G10R10A2_UNORM_SRGB>(pDst, srcPixel); break;
- case R11G11B10_FLOAT: ConvertPixelFromFloat<R11G11B10_FLOAT>(pDst, srcPixel); break;
- case R10G10B10_FLOAT_A2_UNORM: ConvertPixelFromFloat<R10G10B10_FLOAT_A2_UNORM>(pDst, srcPixel); break;
- case R32_SINT: ConvertPixelFromFloat<R32_SINT>(pDst, srcPixel); break;
- case R32_UINT: ConvertPixelFromFloat<R32_UINT>(pDst, srcPixel); break;
- case R32_FLOAT: ConvertPixelFromFloat<R32_FLOAT>(pDst, srcPixel); break;
- case R24_UNORM_X8_TYPELESS: ConvertPixelFromFloat<R24_UNORM_X8_TYPELESS>(pDst, srcPixel); break;
- case X24_TYPELESS_G8_UINT: ConvertPixelFromFloat<X24_TYPELESS_G8_UINT>(pDst, srcPixel); break;
- case L32_UNORM: ConvertPixelFromFloat<L32_UNORM>(pDst, srcPixel); break;
- case L16A16_UNORM: ConvertPixelFromFloat<L16A16_UNORM>(pDst, srcPixel); break;
- case I24X8_UNORM: ConvertPixelFromFloat<I24X8_UNORM>(pDst, srcPixel); break;
- case L24X8_UNORM: ConvertPixelFromFloat<L24X8_UNORM>(pDst, srcPixel); break;
- case I32_FLOAT: ConvertPixelFromFloat<I32_FLOAT>(pDst, srcPixel); break;
- case L32_FLOAT: ConvertPixelFromFloat<L32_FLOAT>(pDst, srcPixel); break;
- case A32_FLOAT: ConvertPixelFromFloat<A32_FLOAT>(pDst, srcPixel); break;
- case B8G8R8X8_UNORM: ConvertPixelFromFloat<B8G8R8X8_UNORM>(pDst, srcPixel); break;
- case B8G8R8X8_UNORM_SRGB: ConvertPixelFromFloat<B8G8R8X8_UNORM_SRGB>(pDst, srcPixel); break;
- case R8G8B8X8_UNORM: ConvertPixelFromFloat<R8G8B8X8_UNORM>(pDst, srcPixel); break;
- case R8G8B8X8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8X8_UNORM_SRGB>(pDst, srcPixel); break;
- case R9G9B9E5_SHAREDEXP: ConvertPixelFromFloat<R9G9B9E5_SHAREDEXP>(pDst, srcPixel); break;
- case B10G10R10X2_UNORM: ConvertPixelFromFloat<B10G10R10X2_UNORM>(pDst, srcPixel); break;
- case L16A16_FLOAT: ConvertPixelFromFloat<L16A16_FLOAT>(pDst, srcPixel); break;
- case R10G10B10X2_USCALED: ConvertPixelFromFloat<R10G10B10X2_USCALED>(pDst, srcPixel); break;
- case R8G8B8A8_SSCALED: ConvertPixelFromFloat<R8G8B8A8_SSCALED>(pDst, srcPixel); break;
- case R8G8B8A8_USCALED: ConvertPixelFromFloat<R8G8B8A8_USCALED>(pDst, srcPixel); break;
- case R16G16_SSCALED: ConvertPixelFromFloat<R16G16_SSCALED>(pDst, srcPixel); break;
- case R16G16_USCALED: ConvertPixelFromFloat<R16G16_USCALED>(pDst, srcPixel); break;
- case R32_SSCALED: ConvertPixelFromFloat<R32_SSCALED>(pDst, srcPixel); break;
- case R32_USCALED: ConvertPixelFromFloat<R32_USCALED>(pDst, srcPixel); break;
- case B5G6R5_UNORM: ConvertPixelFromFloat<B5G6R5_UNORM>(pDst, srcPixel); break;
- case B5G6R5_UNORM_SRGB: ConvertPixelFromFloat<B5G6R5_UNORM_SRGB>(pDst, srcPixel); break;
- case B5G5R5A1_UNORM: ConvertPixelFromFloat<B5G5R5A1_UNORM>(pDst, srcPixel); break;
- case B5G5R5A1_UNORM_SRGB: ConvertPixelFromFloat<B5G5R5A1_UNORM_SRGB>(pDst, srcPixel); break;
- case B4G4R4A4_UNORM: ConvertPixelFromFloat<B4G4R4A4_UNORM>(pDst, srcPixel); break;
- case B4G4R4A4_UNORM_SRGB: ConvertPixelFromFloat<B4G4R4A4_UNORM_SRGB>(pDst, srcPixel); break;
- case R8G8_UNORM: ConvertPixelFromFloat<R8G8_UNORM>(pDst, srcPixel); break;
- case R8G8_SNORM: ConvertPixelFromFloat<R8G8_SNORM>(pDst, srcPixel); break;
- case R8G8_SINT: ConvertPixelFromFloat<R8G8_SINT>(pDst, srcPixel); break;
- case R8G8_UINT: ConvertPixelFromFloat<R8G8_UINT>(pDst, srcPixel); break;
- case R16_UNORM: ConvertPixelFromFloat<R16_UNORM>(pDst, srcPixel); break;
- case R16_SNORM: ConvertPixelFromFloat<R16_SNORM>(pDst, srcPixel); break;
- case R16_SINT: ConvertPixelFromFloat<R16_SINT>(pDst, srcPixel); break;
- case R16_UINT: ConvertPixelFromFloat<R16_UINT>(pDst, srcPixel); break;
- case R16_FLOAT: ConvertPixelFromFloat<R16_FLOAT>(pDst, srcPixel); break;
- case I16_UNORM: ConvertPixelFromFloat<I16_UNORM>(pDst, srcPixel); break;
- case L16_UNORM: ConvertPixelFromFloat<L16_UNORM>(pDst, srcPixel); break;
- case A16_UNORM: ConvertPixelFromFloat<A16_UNORM>(pDst, srcPixel); break;
- case L8A8_UNORM: ConvertPixelFromFloat<L8A8_UNORM>(pDst, srcPixel); break;
- case I16_FLOAT: ConvertPixelFromFloat<I16_FLOAT>(pDst, srcPixel); break;
- case L16_FLOAT: ConvertPixelFromFloat<L16_FLOAT>(pDst, srcPixel); break;
- case A16_FLOAT: ConvertPixelFromFloat<A16_FLOAT>(pDst, srcPixel); break;
- case L8A8_UNORM_SRGB: ConvertPixelFromFloat<L8A8_UNORM_SRGB>(pDst, srcPixel); break;
- case B5G5R5X1_UNORM: ConvertPixelFromFloat<B5G5R5X1_UNORM>(pDst, srcPixel); break;
- case B5G5R5X1_UNORM_SRGB: ConvertPixelFromFloat<B5G5R5X1_UNORM_SRGB>(pDst, srcPixel); break;
- case R8G8_SSCALED: ConvertPixelFromFloat<R8G8_SSCALED>(pDst, srcPixel); break;
- case R8G8_USCALED: ConvertPixelFromFloat<R8G8_USCALED>(pDst, srcPixel); break;
- case R16_SSCALED: ConvertPixelFromFloat<R16_SSCALED>(pDst, srcPixel); break;
- case R16_USCALED: ConvertPixelFromFloat<R16_USCALED>(pDst, srcPixel); break;
- case A1B5G5R5_UNORM: ConvertPixelFromFloat<A1B5G5R5_UNORM>(pDst, srcPixel); break;
- case A4B4G4R4_UNORM: ConvertPixelFromFloat<A4B4G4R4_UNORM>(pDst, srcPixel); break;
- case L8A8_UINT: ConvertPixelFromFloat<L8A8_UINT>(pDst, srcPixel); break;
- case L8A8_SINT: ConvertPixelFromFloat<L8A8_SINT>(pDst, srcPixel); break;
- case R8_UNORM: ConvertPixelFromFloat<R8_UNORM>(pDst, srcPixel); break;
- case R8_SNORM: ConvertPixelFromFloat<R8_SNORM>(pDst, srcPixel); break;
- case R8_SINT: ConvertPixelFromFloat<R8_SINT>(pDst, srcPixel); break;
- case R8_UINT: ConvertPixelFromFloat<R8_UINT>(pDst, srcPixel); break;
- case A8_UNORM: ConvertPixelFromFloat<A8_UNORM>(pDst, srcPixel); break;
- case I8_UNORM: ConvertPixelFromFloat<I8_UNORM>(pDst, srcPixel); break;
- case L8_UNORM: ConvertPixelFromFloat<L8_UNORM>(pDst, srcPixel); break;
- case R8_SSCALED: ConvertPixelFromFloat<R8_SSCALED>(pDst, srcPixel); break;
- case R8_USCALED: ConvertPixelFromFloat<R8_USCALED>(pDst, srcPixel); break;
- case L8_UNORM_SRGB: ConvertPixelFromFloat<L8_UNORM_SRGB>(pDst, srcPixel); break;
- case L8_UINT: ConvertPixelFromFloat<L8_UINT>(pDst, srcPixel); break;
- case L8_SINT: ConvertPixelFromFloat<L8_SINT>(pDst, srcPixel); break;
- case I8_UINT: ConvertPixelFromFloat<I8_UINT>(pDst, srcPixel); break;
- case I8_SINT: ConvertPixelFromFloat<I8_SINT>(pDst, srcPixel); break;
- case YCRCB_SWAPUVY: ConvertPixelFromFloat<YCRCB_SWAPUVY>(pDst, srcPixel); break;
- case BC1_UNORM: ConvertPixelFromFloat<BC1_UNORM>(pDst, srcPixel); break;
- case BC2_UNORM: ConvertPixelFromFloat<BC2_UNORM>(pDst, srcPixel); break;
- case BC3_UNORM: ConvertPixelFromFloat<BC3_UNORM>(pDst, srcPixel); break;
- case BC4_UNORM: ConvertPixelFromFloat<BC4_UNORM>(pDst, srcPixel); break;
- case BC5_UNORM: ConvertPixelFromFloat<BC5_UNORM>(pDst, srcPixel); break;
- case BC1_UNORM_SRGB: ConvertPixelFromFloat<BC1_UNORM_SRGB>(pDst, srcPixel); break;
- case BC2_UNORM_SRGB: ConvertPixelFromFloat<BC2_UNORM_SRGB>(pDst, srcPixel); break;
- case BC3_UNORM_SRGB: ConvertPixelFromFloat<BC3_UNORM_SRGB>(pDst, srcPixel); break;
- case YCRCB_SWAPUV: ConvertPixelFromFloat<YCRCB_SWAPUV>(pDst, srcPixel); break;
- case R8G8B8_UNORM: ConvertPixelFromFloat<R8G8B8_UNORM>(pDst, srcPixel); break;
- case R8G8B8_SNORM: ConvertPixelFromFloat<R8G8B8_SNORM>(pDst, srcPixel); break;
- case R8G8B8_SSCALED: ConvertPixelFromFloat<R8G8B8_SSCALED>(pDst, srcPixel); break;
- case R8G8B8_USCALED: ConvertPixelFromFloat<R8G8B8_USCALED>(pDst, srcPixel); break;
- case BC4_SNORM: ConvertPixelFromFloat<BC4_SNORM>(pDst, srcPixel); break;
- case BC5_SNORM: ConvertPixelFromFloat<BC5_SNORM>(pDst, srcPixel); break;
- case R16G16B16_FLOAT: ConvertPixelFromFloat<R16G16B16_FLOAT>(pDst, srcPixel); break;
- case R16G16B16_UNORM: ConvertPixelFromFloat<R16G16B16_UNORM>(pDst, srcPixel); break;
- case R16G16B16_SNORM: ConvertPixelFromFloat<R16G16B16_SNORM>(pDst, srcPixel); break;
- case R16G16B16_SSCALED: ConvertPixelFromFloat<R16G16B16_SSCALED>(pDst, srcPixel); break;
- case R16G16B16_USCALED: ConvertPixelFromFloat<R16G16B16_USCALED>(pDst, srcPixel); break;
- case BC6H_SF16: ConvertPixelFromFloat<BC6H_SF16>(pDst, srcPixel); break;
- case BC7_UNORM: ConvertPixelFromFloat<BC7_UNORM>(pDst, srcPixel); break;
- case BC7_UNORM_SRGB: ConvertPixelFromFloat<BC7_UNORM_SRGB>(pDst, srcPixel); break;
- case BC6H_UF16: ConvertPixelFromFloat<BC6H_UF16>(pDst, srcPixel); break;
- case R8G8B8_UNORM_SRGB: ConvertPixelFromFloat<R8G8B8_UNORM_SRGB>(pDst, srcPixel); break;
- case R16G16B16_UINT: ConvertPixelFromFloat<R16G16B16_UINT>(pDst, srcPixel); break;
- case R16G16B16_SINT: ConvertPixelFromFloat<R16G16B16_SINT>(pDst, srcPixel); break;
- case R10G10B10A2_SNORM: ConvertPixelFromFloat<R10G10B10A2_SNORM>(pDst, srcPixel); break;
- case R10G10B10A2_USCALED: ConvertPixelFromFloat<R10G10B10A2_USCALED>(pDst, srcPixel); break;
- case R10G10B10A2_SSCALED: ConvertPixelFromFloat<R10G10B10A2_SSCALED>(pDst, srcPixel); break;
- case R10G10B10A2_SINT: ConvertPixelFromFloat<R10G10B10A2_SINT>(pDst, srcPixel); break;
- case B10G10R10A2_SNORM: ConvertPixelFromFloat<B10G10R10A2_SNORM>(pDst, srcPixel); break;
- case B10G10R10A2_USCALED: ConvertPixelFromFloat<B10G10R10A2_USCALED>(pDst, srcPixel); break;
- case B10G10R10A2_SSCALED: ConvertPixelFromFloat<B10G10R10A2_SSCALED>(pDst, srcPixel); break;
- case B10G10R10A2_UINT: ConvertPixelFromFloat<B10G10R10A2_UINT>(pDst, srcPixel); break;
- case B10G10R10A2_SINT: ConvertPixelFromFloat<B10G10R10A2_SINT>(pDst, srcPixel); break;
- case R8G8B8_UINT: ConvertPixelFromFloat<R8G8B8_UINT>(pDst, srcPixel); break;
- case R8G8B8_SINT: ConvertPixelFromFloat<R8G8B8_SINT>(pDst, srcPixel); break;
- case RAW: ConvertPixelFromFloat<RAW>(pDst, srcPixel); break;
- default:
- SWR_INVALID("Invalid format: %d", format);
- break;
- }
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp b/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
deleted file mode 100644
index 3a19bbac70e..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/InitMemory.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file InitMemory.cpp
-*
-* @brief Provide access to tiles table initialization functions
-*
-******************************************************************************/
-
-#include "memory/InitMemory.h"
-#include "memory/LoadTile.h"
-#include "memory/StoreTile.h"
-#include "InitMemory.h"
-
-void InitSimLoadTilesTable();
-void InitSimStoreTilesTable();
-void InitSimClearTilesTable();
-
-void InitTilesTable()
-{
- InitSimLoadTilesTable();
- InitSimStoreTilesTable();
- InitSimClearTilesTable();
-}
-
-
-void SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs)
-{
- out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile;
- out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface;
-} \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h b/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
deleted file mode 100644
index a3ed7b3cbdb..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/InitMemory.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file InitMemory.h
-*
-* @brief Provide access to tiles table initialization functions
-*
-******************************************************************************/
-
-#pragma once
-
-#include "common/os.h"
-#include "memory/SurfaceState.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a full hottile from a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param dstFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to src render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pDstHotTile - Pointer to Hot Tile
-SWR_FUNC(void,
- SwrLoadHotTile,
- HANDLE hWorkerPrivateData,
- const SWR_SURFACE_STATE* pSrcSurface,
- BucketManager* pBucketManager,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- uint8_t* pDstHotTile);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Deswizzles and stores a full hottile to a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param srcFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to destination render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pSrcHotTile - Pointer to Hot Tile
-SWR_FUNC(void,
- SwrStoreHotTileToSurface,
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE* pDstSurface,
- BucketManager* pBucketManager,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- uint8_t* pSrcHotTile);
-
-struct SWR_TILE_INTERFACE {
- PFNSwrLoadHotTile pfnSwrLoadHotTile;
- PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface;
-};
-
-extern "C"
-{
- SWR_VISIBLE void SWR_API InitTilesTable();
-
- typedef void(SWR_API* PFNSwrGetTileInterface)(SWR_TILE_INTERFACE& out_funcs);
- SWR_VISIBLE void SWR_API SwrGetTileIterface(SWR_TILE_INTERFACE &out_funcs);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp
deleted file mode 100644
index a26d45d130f..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file LoadTile.cpp
-*
-* @brief Functionality for Load
-*
-******************************************************************************/
-#include "LoadTile.h"
-
-// on demand buckets for load tiles
-static std::vector<int> sBuckets(NUM_SWR_FORMATS, -1);
-static std::mutex sBucketMutex;
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a full hottile from a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param dstFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to src render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pDstHotTile - Pointer to Hot Tile
-void SwrLoadHotTile(
- HANDLE hWorkerPrivateData,
- const SWR_SURFACE_STATE *pSrcSurface,
- BucketManager* pBucketMgr,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
- uint8_t *pDstHotTile)
-{
- PFN_LOAD_TILES pfnLoadTiles = NULL;
-
- // don't need to load null surfaces
- if (pSrcSurface->type == SURFACE_NULL)
- {
- return;
- }
-
- // force 0 if requested renderTargetArrayIndex is OOB
- if (renderTargetArrayIndex >= pSrcSurface->depth)
- {
- renderTargetArrayIndex = 0;
- }
-
- if (renderTargetIndex < SWR_ATTACHMENT_DEPTH)
- {
- switch (pSrcSurface->tileMode)
- {
- case SWR_TILE_NONE:
- pfnLoadTiles = sLoadTilesColorTable_SWR_TILE_NONE[pSrcSurface->format];
- break;
- case SWR_TILE_MODE_YMAJOR:
- pfnLoadTiles = sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR[pSrcSurface->format];
- break;
- case SWR_TILE_MODE_XMAJOR:
- pfnLoadTiles = sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR[pSrcSurface->format];
- break;
- case SWR_TILE_MODE_WMAJOR:
- SWR_ASSERT(pSrcSurface->format == R8_UINT);
- pfnLoadTiles = LoadMacroTile<TilingTraits<SWR_TILE_MODE_WMAJOR, 8>, R8_UINT, R8_UINT>::Load;
- break;
- default:
- SWR_INVALID("Unsupported tiling mode");
- break;
- }
- }
- else if (renderTargetIndex == SWR_ATTACHMENT_DEPTH)
- {
- // Currently depth can map to linear and tile-y.
- switch (pSrcSurface->tileMode)
- {
- case SWR_TILE_NONE:
- pfnLoadTiles = sLoadTilesDepthTable_SWR_TILE_NONE[pSrcSurface->format];
- break;
- case SWR_TILE_MODE_YMAJOR:
- pfnLoadTiles = sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR[pSrcSurface->format];
- break;
- default:
- SWR_INVALID("Unsupported tiling mode");
- break;
- }
- }
- else
- {
- SWR_ASSERT(renderTargetIndex == SWR_ATTACHMENT_STENCIL);
- SWR_ASSERT(pSrcSurface->format == R8_UINT);
- switch (pSrcSurface->tileMode)
- {
- case SWR_TILE_NONE:
- pfnLoadTiles = LoadMacroTile<TilingTraits<SWR_TILE_NONE, 8>, R8_UINT, R8_UINT>::Load;
- break;
- case SWR_TILE_MODE_WMAJOR:
- pfnLoadTiles = LoadMacroTile<TilingTraits<SWR_TILE_MODE_WMAJOR, 8>, R8_UINT, R8_UINT>::Load;
- break;
- default:
- SWR_INVALID("Unsupported tiling mode");
- break;
- }
- }
-
- if (pfnLoadTiles == nullptr)
- {
- SWR_INVALID("Unsupported format for load tile");
- return;
- }
-
- // Load a macro tile.
-#ifdef KNOB_ENABLE_RDTSC
- if (sBuckets[pSrcSurface->format] == -1)
- {
- // guard sBuckets update since storetiles is called by multiple threads
- sBucketMutex.lock();
- if (sBuckets[pSrcSurface->format] == -1)
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(pSrcSurface->format);
- BUCKET_DESC desc{ info.name, "", false, 0xffffffff };
- sBuckets[pSrcSurface->format] = pBucketMgr->RegisterBucket(desc);
- }
- sBucketMutex.unlock();
- }
-#endif
-
-#ifdef KNOB_ENABLE_RDTSC
- pBucketMgr->StartBucket(sBuckets[pSrcSurface->format]);
-#endif
- pfnLoadTiles(pSrcSurface, pDstHotTile, x, y, renderTargetArrayIndex);
-#ifdef KNOB_ENABLE_RDTSC
- pBucketMgr->StopBucket(sBuckets[pSrcSurface->format]);
-#endif
-}
-
-
-void InitSimLoadTilesTable()
-{
- InitLoadTilesTable_Linear();
- InitLoadTilesTable_XMajor();
- InitLoadTilesTable_YMajor();
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.h b/src/gallium/drivers/swr/rasterizer/memory/LoadTile.h
deleted file mode 100644
index f74c3fdf4b0..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile.h
+++ /dev/null
@@ -1,354 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file LoadTile.h
-*
-* @brief Functionality for Load
-*
-******************************************************************************/
-#include "common/os.h"
-#include "common/formats.h"
-#include "core/context.h"
-#include "core/rdtsc_core.h"
-#include "memory/TilingFunctions.h"
-#include "memory/tilingtraits.h"
-#include "memory/Convert.h"
-
-typedef void(*PFN_LOAD_TILES)(const SWR_SURFACE_STATE*, uint8_t*, uint32_t, uint32_t, uint32_t);
-typedef void(*PFN_LOAD_RASTER_TILES)(const SWR_SURFACE_STATE*, uint8_t*, uint32_t, uint32_t, uint32_t, uint32_t);
-
-//////////////////////////////////////////////////////////////////////////
-/// Load Raster Tile Function Tables.
-//////////////////////////////////////////////////////////////////////////
-extern PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
-extern PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
-
-extern PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
-extern PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR[NUM_SWR_FORMATS];
-
-extern PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
-
-void InitLoadTilesTable_Linear();
-void InitLoadTilesTable_XMajor();
-void InitLoadTilesTable_YMajor();
-
-//////////////////////////////////////////////////////////////////////////
-/// LoadRasterTile
-//////////////////////////////////////////////////////////////////////////
-template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct LoadRasterTile
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from hot tile source which is always float.
- /// @param pSrc - Pointer to raster tile.
- /// @param x, y - Coordinates to raster tile.
- /// @param output - output color
- INLINE static void SetSwizzledDstColor(
- const float srcColor[4],
- uint32_t x, uint32_t y,
- uint8_t* pDst)
- {
- typedef SimdTile_16<DstFormat, SrcFormat> SimdT;
-
- SimdT* pDstSimdTiles = (SimdT*)pDst;
-
- // Compute which simd tile we're accessing within 8x8 tile.
- // i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.
- uint32_t simdIndex = (y / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM) + (x / SIMD16_TILE_X_DIM);
-
- SimdT* pSimdTile = &pDstSimdTiles[simdIndex];
-
- uint32_t simdOffset = (y % SIMD16_TILE_Y_DIM) * SIMD16_TILE_X_DIM + (x % SIMD16_TILE_X_DIM);
-
- pSimdTile->SetSwizzledColor(simdOffset, srcColor);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Loads an 8x8 raster tile from the src surface.
- /// @param pSrcSurface - Src surface state
- /// @param pDst - Destination hot tile pointer
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Load(
- const SWR_SURFACE_STATE* pSrcSurface,
- uint8_t* pDst,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
- {
- uint32_t lodWidth = (pSrcSurface->width == 1) ? 1 : pSrcSurface->width >> pSrcSurface->lod;
- uint32_t lodHeight = (pSrcSurface->height == 1) ? 1 : pSrcSurface->height >> pSrcSurface->lod;
-
- // For each raster tile pixel (rx, ry)
- for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
- {
- for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
- {
- if (((x + rx) < lodWidth) &&
- ((y + ry) < lodHeight))
- {
- uint8_t* pSrc = (uint8_t*)ComputeSurfaceAddress<false, true>(x + rx, y + ry, pSrcSurface->arrayIndex + renderTargetArrayIndex,
- pSrcSurface->arrayIndex + renderTargetArrayIndex, sampleNum,
- pSrcSurface->lod, pSrcSurface);
-
- float srcColor[4];
- ConvertPixelToFloat<SrcFormat>(srcColor, pSrc);
-
- // store pixel to hottile
- SetSwizzledDstColor(srcColor, rx, ry, pDst);
- }
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// LoadMacroTile - Loads a macro tile which consists of raster tiles.
-//////////////////////////////////////////////////////////////////////////
-template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct LoadMacroTile
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Load a macrotile to the destination surface.
- /// @param pSrc - Pointer to macro tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to macro tile
- static void Load(
- const SWR_SURFACE_STATE* pSrcSurface,
- uint8_t *pDstHotTile,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)
- {
- PFN_LOAD_RASTER_TILES loadRasterTileFn;
- loadRasterTileFn = LoadRasterTile<TTraits, SrcFormat, DstFormat>::Load;
-
- // Load each raster tile from the hot tile to the destination surface.
- for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- for (uint32_t sampleNum = 0; sampleNum < pSrcSurface->numSamples; sampleNum++)
- {
- loadRasterTileFn(pSrcSurface, pDstHotTile, (x + col), (y + row), sampleNum, renderTargetArrayIndex);
- pDstHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<DstFormat>::bpp / 8);
- }
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// InitLoadTileColorTable - Helper function for setting up the tables.
-template<SWR_TILE_MODE TTileMode>
-static INLINE void InitLoadTileColorTable(PFN_LOAD_TILES (&table)[NUM_SWR_FORMATS])
-{
- memset(table, 0, sizeof(table));
-
- table[R32G32B32A32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32A32_SINT] = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_SINT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32A32_UINT] = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_UINT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32X32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32X32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32A32_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32A32_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32_SINT] = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_SINT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32_UINT] = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_UINT, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R32G32B32_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 96>, R32G32B32_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_SINT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_SINT, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_UINT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_UINT, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R32G32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R32G32_SINT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_SINT, R32G32B32A32_FLOAT>::Load;
- table[R32G32_UINT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_UINT, R32G32B32A32_FLOAT>::Load;
- table[R32_FLOAT_X8X24_TYPELESS] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32_FLOAT_X8X24_TYPELESS, R32G32B32A32_FLOAT>::Load;
- table[X32_TYPELESS_G8X24_UINT] = LoadMacroTile<TilingTraits<TTileMode, 64>, X32_TYPELESS_G8X24_UINT, R32G32B32A32_FLOAT>::Load;
- table[L32A32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 64>, L32A32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16X16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16X16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16X16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16X16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[L32X32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 64>, L32X32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[I32X32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 64>, I32X32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16A16_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 64>, R16G16B16A16_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R32G32_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R32G32_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32G32_USCALED, R32G32B32A32_FLOAT>::Load;
- table[B8G8R8A8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8A8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B8G8R8A8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_UINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_UINT, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_SINT, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_UINT, R32G32B32A32_FLOAT>::Load;
- table[R16G16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16_SINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_SINT, R32G32B32A32_FLOAT>::Load;
- table[R16G16_UINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_UINT, R32G32B32A32_FLOAT>::Load;
- table[R16G16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R11G11B10_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R11G11B10_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10_FLOAT_A2_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10_FLOAT_A2_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R32_SINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_SINT, R32G32B32A32_FLOAT>::Load;
- table[R32_UINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_UINT, R32G32B32A32_FLOAT>::Load;
- table[R32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R24_UNORM_X8_TYPELESS] = LoadMacroTile<TilingTraits<TTileMode, 32>, R24_UNORM_X8_TYPELESS, R32G32B32A32_FLOAT>::Load;
- table[X24_TYPELESS_G8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, X24_TYPELESS_G8_UINT, R32G32B32A32_FLOAT>::Load;
- table[L32_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, L32_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L16A16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, L16A16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[I24X8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, I24X8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L24X8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, L24X8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[I32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, I32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[L32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, L32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[A32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, A32_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[B8G8R8X8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8X8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B8G8R8X8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 32>, B8G8R8X8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8X8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8X8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8X8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8X8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R9G9B9E5_SHAREDEXP] = LoadMacroTile<TilingTraits<TTileMode, 32>, R9G9B9E5_SHAREDEXP, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10X2_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10X2_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L16A16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, L16A16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10X2_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10X2_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8A8_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R8G8B8A8_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R16G16_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R16G16_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R16G16_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R32_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R32_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_USCALED, R32G32B32A32_FLOAT>::Load;
- table[B5G6R5_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G6R5_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B5G6R5_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G6R5_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[B5G5R5A1_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5A1_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B5G5R5A1_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5A1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[B4G4R4A4_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, B4G4R4A4_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B4G4R4A4_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 16>, B4G4R4A4_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R8G8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_SINT, R32G32B32A32_FLOAT>::Load;
- table[R8G8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_UINT, R32G32B32A32_FLOAT>::Load;
- table[R16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R16_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R16_SINT] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_SINT, R32G32B32A32_FLOAT>::Load;
- table[R16_UINT] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_UINT, R32G32B32A32_FLOAT>::Load;
- table[R16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[I16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, I16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, L16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[A16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, A16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L8A8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[I16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 16>, I16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[L16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 16>, L16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[A16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 16>, A16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[L8A8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[B5G5R5X1_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5X1_UNORM, R32G32B32A32_FLOAT>::Load;
- table[B5G5R5X1_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 16>, B5G5R5X1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R8G8_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R8G8_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 16>, R8G8_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R16_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R16_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_USCALED, R32G32B32A32_FLOAT>::Load;
- table[A1B5G5R5_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, A1B5G5R5_UNORM, R32G32B32A32_FLOAT>::Load;
- table[A4B4G4R4_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, A4B4G4R4_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L8A8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_UINT, R32G32B32A32_FLOAT>::Load;
- table[L8A8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 16>, L8A8_SINT, R32G32B32A32_FLOAT>::Load;
- table[R8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R8_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_SINT, R32G32B32A32_FLOAT>::Load;
- table[R8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_UINT, R32G32B32A32_FLOAT>::Load;
- table[A8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 8>, A8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[I8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 8>, I8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[L8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R8_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R8_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 8>, R8_USCALED, R32G32B32A32_FLOAT>::Load;
- table[L8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[L8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_UINT, R32G32B32A32_FLOAT>::Load;
- table[L8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 8>, L8_SINT, R32G32B32A32_FLOAT>::Load;
- table[I8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 8>, I8_UINT, R32G32B32A32_FLOAT>::Load;
- table[I8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 8>, I8_SINT, R32G32B32A32_FLOAT>::Load;
- table[YCRCB_SWAPUVY] = LoadMacroTile<TilingTraits<TTileMode, 32>, YCRCB_SWAPUVY, R32G32B32A32_FLOAT>::Load;
- table[BC1_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 64>, BC1_UNORM, R32G32B32A32_FLOAT>::Load;
- table[BC2_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC2_UNORM, R32G32B32A32_FLOAT>::Load;
- table[BC3_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC3_UNORM, R32G32B32A32_FLOAT>::Load;
- table[BC4_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 64>, BC4_UNORM, R32G32B32A32_FLOAT>::Load;
- table[BC5_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC5_UNORM, R32G32B32A32_FLOAT>::Load;
- table[BC1_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 64>, BC1_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[BC2_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC2_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[BC3_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC3_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[YCRCB_SWAPUV] = LoadMacroTile<TilingTraits<TTileMode, 32>, YCRCB_SWAPUV, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_USCALED, R32G32B32A32_FLOAT>::Load;
- table[BC4_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 64>, BC4_SNORM, R32G32B32A32_FLOAT>::Load;
- table[BC5_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC5_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_FLOAT, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_UNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_USCALED, R32G32B32A32_FLOAT>::Load;
- table[BC6H_SF16] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC6H_SF16, R32G32B32A32_FLOAT>::Load;
- table[BC7_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC7_UNORM, R32G32B32A32_FLOAT>::Load;
- table[BC7_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC7_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[BC6H_UF16] = LoadMacroTile<TilingTraits<TTileMode, 128>, BC6H_UF16, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_UNORM_SRGB] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_UNORM_SRGB, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_UINT] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_UINT, R32G32B32A32_FLOAT>::Load;
- table[R16G16B16_SINT] = LoadMacroTile<TilingTraits<TTileMode, 48>, R16G16B16_SINT, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_SNORM, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_USCALED, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[R10G10B10A2_SINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R10G10B10A2_SINT, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_SNORM] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_SNORM, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_USCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_USCALED, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_SSCALED] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_SSCALED, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_UINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_UINT, R32G32B32A32_FLOAT>::Load;
- table[B10G10R10A2_SINT] = LoadMacroTile<TilingTraits<TTileMode, 32>, B10G10R10A2_SINT, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_UINT] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_UINT, R32G32B32A32_FLOAT>::Load;
- table[R8G8B8_SINT] = LoadMacroTile<TilingTraits<TTileMode, 24>, R8G8B8_SINT, R32G32B32A32_FLOAT>::Load;
- table[RAW] = LoadMacroTile<TilingTraits<TTileMode, 8>, RAW, R32G32B32A32_FLOAT>::Load;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// InitLoadTileColorTable - Helper function for setting up the tables.
-template<SWR_TILE_MODE TTileMode>
-static INLINE void InitLoadTileDepthTable(PFN_LOAD_TILES(&table)[NUM_SWR_FORMATS])
-{
- memset(table, 0, sizeof(table));
-
- table[R32_FLOAT] = LoadMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R32_FLOAT>::Load;
- table[R32_FLOAT_X8X24_TYPELESS] = LoadMacroTile<TilingTraits<TTileMode, 64>, R32_FLOAT_X8X24_TYPELESS, R32_FLOAT>::Load;
- table[R24_UNORM_X8_TYPELESS] = LoadMacroTile<TilingTraits<TTileMode, 32>, R24_UNORM_X8_TYPELESS, R32_FLOAT>::Load;
- table[R16_UNORM] = LoadMacroTile<TilingTraits<TTileMode, 16>, R16_UNORM, R32_FLOAT>::Load;
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Loads a full hottile from a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param dstFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to src render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pDstHotTile - Pointer to Hot Tile
-void SwrLoadHotTile(
- HANDLE hWorkerPrivateData,
- const SWR_SURFACE_STATE *pSrcSurface,
- BucketManager* pBucketMgr,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
- uint8_t *pDstHotTile);
diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile_Linear.cpp b/src/gallium/drivers/swr/rasterizer/memory/LoadTile_Linear.cpp
deleted file mode 100644
index 5f53b5b6b56..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile_Linear.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file LoadTile.cpp
-*
-* @brief Functionality for Load
-*
-******************************************************************************/
-#include "LoadTile.h"
-
-PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
-PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_NONE[NUM_SWR_FORMATS];
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Sets up tables for LoadTile
-void InitLoadTilesTable_Linear()
-{
- InitLoadTileColorTable<SWR_TILE_NONE>(sLoadTilesColorTable_SWR_TILE_NONE);
- InitLoadTileDepthTable<SWR_TILE_NONE>(sLoadTilesDepthTable_SWR_TILE_NONE);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileX.cpp b/src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileX.cpp
deleted file mode 100644
index 8e76655ff11..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileX.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file LoadTile.cpp
-*
-* @brief Functionality for Load
-*
-******************************************************************************/
-#include "LoadTile.h"
-
-PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR[NUM_SWR_FORMATS];
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Sets up tables for LoadTile
-void InitLoadTilesTable_XMajor()
-{
- InitLoadTileColorTable<SWR_TILE_MODE_XMAJOR>(sLoadTilesColorTable_SWR_TILE_MODE_XMAJOR);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileY.cpp b/src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileY.cpp
deleted file mode 100644
index c136392eb78..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/LoadTile_TileY.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file LoadTile.cpp
-*
-* @brief Functionality for Load
-*
-******************************************************************************/
-#include "LoadTile.h"
-
-PFN_LOAD_TILES sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
-PFN_LOAD_TILES sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR[NUM_SWR_FORMATS];
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Sets up tables for LoadTile
-void InitLoadTilesTable_YMajor()
-{
- InitLoadTileColorTable<SWR_TILE_MODE_YMAJOR>(sLoadTilesColorTable_SWR_TILE_MODE_YMAJOR);
- InitLoadTileDepthTable<SWR_TILE_MODE_YMAJOR>(sLoadTilesDepthTable_SWR_TILE_MODE_YMAJOR);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
deleted file mode 100644
index 9fee13a045a..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-//////////////////////////////////////////////////////////////////////////
-/// Store Raster Tile Function Tables.
-//////////////////////////////////////////////////////////////////////////
-PFN_STORE_TILES sStoreTilesTableColor[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS] = {};
-PFN_STORE_TILES sStoreTilesTableDepth[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS] = {};
-PFN_STORE_TILES sStoreTilesTableStencil[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS] = {};
-
-// on demand buckets for store tiles
-static std::mutex sBucketMutex;
-static std::vector<int32_t> sBuckets(NUM_SWR_FORMATS, -1);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Deswizzles and stores a full hottile to a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param srcFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to destination render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pSrcHotTile - Pointer to Hot Tile
-void SwrStoreHotTileToSurface(
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE *pDstSurface,
- BucketManager* pBucketMgr,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
- uint8_t *pSrcHotTile)
-{
- if (pDstSurface->type == SURFACE_NULL)
- {
- return;
- }
-
- // force 0 if requested renderTargetArrayIndex is OOB
- if (renderTargetArrayIndex >= pDstSurface->depth)
- {
- renderTargetArrayIndex = 0;
- }
-
- PFN_STORE_TILES pfnStoreTiles = nullptr;
-
- if (renderTargetIndex <= SWR_ATTACHMENT_COLOR7)
- {
- pfnStoreTiles = sStoreTilesTableColor[pDstSurface->tileMode][pDstSurface->format];
- }
- else if (renderTargetIndex == SWR_ATTACHMENT_DEPTH)
- {
- pfnStoreTiles = sStoreTilesTableDepth[pDstSurface->tileMode][pDstSurface->format];
- }
- else
- {
- pfnStoreTiles = sStoreTilesTableStencil[pDstSurface->tileMode][pDstSurface->format];
- }
-
- if(nullptr == pfnStoreTiles)
- {
- SWR_INVALID("Invalid pixel format / tile mode for store tiles");
- return;
- }
-
- // Store a macro tile
-#ifdef KNOB_ENABLE_RDTSC
- if (sBuckets[pDstSurface->format] == -1)
- {
- // guard sBuckets update since storetiles is called by multiple threads
- sBucketMutex.lock();
- if (sBuckets[pDstSurface->format] == -1)
- {
- const SWR_FORMAT_INFO& info = GetFormatInfo(pDstSurface->format);
- BUCKET_DESC desc{info.name, "", false, 0xffffffff};
- sBuckets[pDstSurface->format] = pBucketMgr->RegisterBucket(desc);
- }
- sBucketMutex.unlock();
- }
-#endif
-
-#ifdef KNOB_ENABLE_RDTSC
- pBucketMgr->StartBucket(sBuckets[pDstSurface->format]);
-#endif
- pfnStoreTiles(pSrcHotTile, pDstSurface, x, y, renderTargetArrayIndex);
-#ifdef KNOB_ENABLE_RDTSC
- pBucketMgr->StopBucket(sBuckets[pDstSurface->format]);
-#endif
-
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Sets up tables for StoreTile
-void InitSimStoreTilesTable()
-{
- memset(sStoreTilesTableColor, 0, sizeof(sStoreTilesTableColor));
- memset(sStoreTilesTableDepth, 0, sizeof(sStoreTilesTableDepth));
-
- InitStoreTilesTable_Linear_1();
- InitStoreTilesTable_Linear_2();
- InitStoreTilesTable_TileX_1();
- InitStoreTilesTable_TileX_2();
- InitStoreTilesTable_TileY_1();
- InitStoreTilesTable_TileY_2();
- InitStoreTilesTable_TileW();
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
deleted file mode 100644
index 1b7698cc5b8..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ /dev/null
@@ -1,2051 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile.h
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#pragma once
-
-#include "common/os.h"
-#include "common/formats.h"
-#include "core/context.h"
-#include "core/rdtsc_core.h"
-#include "core/format_conversion.h"
-
-#include "memory/TilingFunctions.h"
-#include "memory/Convert.h"
-#include "memory/SurfaceState.h"
-#include "core/multisample.h"
-
-#include <array>
-#include <sstream>
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-// Function pointer to different storing functions for color, depth, and stencil based on incoming formats.
-typedef void(*PFN_STORE_TILES)(uint8_t*, SWR_SURFACE_STATE*, uint32_t, uint32_t, uint32_t);
-
-//////////////////////////////////////////////////////////////////////////
-/// Store Raster Tile Function Tables.
-//////////////////////////////////////////////////////////////////////////
-extern PFN_STORE_TILES sStoreTilesTableColor[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS];
-extern PFN_STORE_TILES sStoreTilesTableDepth[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS];
-extern PFN_STORE_TILES sStoreTilesTableStencil[SWR_TILE_MODE_COUNT][NUM_SWR_FORMATS];
-
-void InitStoreTilesTable_Linear_1();
-void InitStoreTilesTable_Linear_2();
-void InitStoreTilesTable_TileX_1();
-void InitStoreTilesTable_TileX_2();
-void InitStoreTilesTable_TileY_1();
-void InitStoreTilesTable_TileY_2();
-void InitStoreTilesTable_TileW();
-void InitStoreTilesTable();
-
-//////////////////////////////////////////////////////////////////////////
-/// StorePixels
-/// @brief Stores a 4x2 (AVX) raster-tile to two rows.
-/// @param pSrc - Pointer to source raster tile in SWRZ pixel order
-/// @param ppDsts - Array of destination pointers. Each pointer is
-/// to a single row of at most 16B.
-/// @tparam NumDests - Number of destination pointers. Each pair of
-/// pointers is for a 16-byte column of two rows.
-//////////////////////////////////////////////////////////////////////////
-template <size_t PixelSize, size_t NumDests>
-struct StorePixels
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests]) = delete;
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StorePixels (32-bit pixel specialization)
-/// @brief Stores a 4x2 (AVX) raster-tile to two rows.
-/// @param pSrc - Pointer to source raster tile in SWRZ pixel order
-/// @param ppDsts - Array of destination pointers. Each pointer is
-/// to a single row of at most 16B.
-/// @tparam NumDests - Number of destination pointers. Each pair of
-/// pointers is for a 16-byte column of two rows.
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct StorePixels<8, 2>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[2])
- {
- // Each 4-pixel row is 4 bytes.
- const uint16_t* pPixSrc = (const uint16_t*)pSrc;
-
- // Unswizzle from SWR-Z order
- uint16_t* pRow = (uint16_t*)ppDsts[0];
- pRow[0] = pPixSrc[0];
- pRow[1] = pPixSrc[2];
-
- pRow = (uint16_t*)ppDsts[1];
- pRow[0] = pPixSrc[1];
- pRow[1] = pPixSrc[3];
- }
-};
-
-template <>
-struct StorePixels<8, 4>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])
- {
- // 8 x 2 bytes = 16 bytes, 16 pixels
- const uint16_t *pSrc16 = reinterpret_cast<const uint16_t *>(pSrc);
-
- uint16_t **ppDsts16 = reinterpret_cast<uint16_t **>(ppDsts);
-
- // Unswizzle from SWR-Z order
- ppDsts16[0][0] = pSrc16[0]; // 0 1
- ppDsts16[0][1] = pSrc16[2]; // 4 5
-
- ppDsts16[1][0] = pSrc16[1]; // 2 3
- ppDsts16[1][1] = pSrc16[3]; // 6 7
-
- ppDsts16[2][0] = pSrc16[4]; // 8 9
- ppDsts16[2][1] = pSrc16[6]; // C D
-
- ppDsts16[3][0] = pSrc16[5]; // A B
- ppDsts16[3][1] = pSrc16[7]; // E F
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StorePixels (32-bit pixel specialization)
-/// @brief Stores a 4x2 (AVX) raster-tile to two rows.
-/// @param pSrc - Pointer to source raster tile in SWRZ pixel order
-/// @param ppDsts - Array of destination pointers. Each pointer is
-/// to a single row of at most 16B.
-/// @tparam NumDests - Number of destination pointers. Each pair of
-/// pointers is for a 16-byte column of two rows.
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct StorePixels<16, 2>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[2])
- {
- // Each 4-pixel row is 8 bytes.
- const uint32_t* pPixSrc = (const uint32_t*)pSrc;
-
- // Unswizzle from SWR-Z order
- uint32_t* pRow = (uint32_t*)ppDsts[0];
- pRow[0] = pPixSrc[0];
- pRow[1] = pPixSrc[2];
-
- pRow = (uint32_t*)ppDsts[1];
- pRow[0] = pPixSrc[1];
- pRow[1] = pPixSrc[3];
- }
-};
-
-template <>
-struct StorePixels<16, 4>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])
- {
- // 8 x 4 bytes = 32 bytes, 16 pixels
- const uint32_t *pSrc32 = reinterpret_cast<const uint32_t *>(pSrc);
-
- uint32_t **ppDsts32 = reinterpret_cast<uint32_t **>(ppDsts);
-
- // Unswizzle from SWR-Z order
- ppDsts32[0][0] = pSrc32[0]; // 0 1
- ppDsts32[0][1] = pSrc32[2]; // 4 5
-
- ppDsts32[1][0] = pSrc32[1]; // 2 3
- ppDsts32[1][1] = pSrc32[3]; // 6 7
-
- ppDsts32[2][0] = pSrc32[4]; // 8 9
- ppDsts32[2][1] = pSrc32[6]; // C D
-
- ppDsts32[3][0] = pSrc32[5]; // A B
- ppDsts32[3][1] = pSrc32[7]; // E F
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StorePixels (32-bit pixel specialization)
-/// @brief Stores a 4x2 (AVX) raster-tile to two rows.
-/// @param pSrc - Pointer to source raster tile in SWRZ pixel order
-/// @param ppDsts - Array of destination pointers. Each pointer is
-/// to a single row of at most 16B.
-/// @tparam NumDests - Number of destination pointers. Each pair of
-/// pointers is for a 16-byte column of two rows.
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct StorePixels<32, 2>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[2])
- {
- // Each 4-pixel row is 16-bytes
- simd4scalari *pZRow01 = (simd4scalari*)pSrc;
- simd4scalari vQuad00 = SIMD128::load_si(pZRow01);
- simd4scalari vQuad01 = SIMD128::load_si(pZRow01 + 1);
-
- simd4scalari vRow00 = SIMD128::unpacklo_epi64(vQuad00, vQuad01);
- simd4scalari vRow10 = SIMD128::unpackhi_epi64(vQuad00, vQuad01);
-
- SIMD128::storeu_si((simd4scalari*)ppDsts[0], vRow00);
- SIMD128::storeu_si((simd4scalari*)ppDsts[1], vRow10);
- }
-};
-
-template <>
-struct StorePixels<32, 4>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])
- {
- // 4 x 16 bytes = 64 bytes, 16 pixels
- const simd4scalari *pSrc128 = reinterpret_cast<const simd4scalari *>(pSrc);
-
- simd4scalari **ppDsts128 = reinterpret_cast<simd4scalari **>(ppDsts);
-
- // Unswizzle from SWR-Z order
- simd4scalari quad0 = SIMD128::load_si(&pSrc128[0]); // 0 1 2 3
- simd4scalari quad1 = SIMD128::load_si(&pSrc128[1]); // 4 5 6 7
- simd4scalari quad2 = SIMD128::load_si(&pSrc128[2]); // 8 9 A B
- simd4scalari quad3 = SIMD128::load_si(&pSrc128[3]); // C D E F
-
- SIMD128::storeu_si(ppDsts128[0], SIMD128::unpacklo_epi64(quad0, quad1)); // 0 1 4 5
- SIMD128::storeu_si(ppDsts128[1], SIMD128::unpackhi_epi64(quad0, quad1)); // 2 3 6 7
- SIMD128::storeu_si(ppDsts128[2], SIMD128::unpacklo_epi64(quad2, quad3)); // 8 9 C D
- SIMD128::storeu_si(ppDsts128[3], SIMD128::unpackhi_epi64(quad2, quad3)); // A B E F
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StorePixels (32-bit pixel specialization)
-/// @brief Stores a 4x2 (AVX) raster-tile to two rows.
-/// @param pSrc - Pointer to source raster tile in SWRZ pixel order
-/// @param ppDsts - Array of destination pointers. Each pointer is
-/// to a single row of at most 16B.
-/// @tparam NumDests - Number of destination pointers. Each pair of
-/// pointers is for a 16-byte column of two rows.
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct StorePixels<64, 4>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[4])
- {
- // Each 4-pixel row is 32 bytes.
- const simd4scalari* pPixSrc = (const simd4scalari*)pSrc;
-
- // order of pointers match SWR-Z layout
- simd4scalari** pvDsts = (simd4scalari**)&ppDsts[0];
- *pvDsts[0] = pPixSrc[0];
- *pvDsts[1] = pPixSrc[1];
- *pvDsts[2] = pPixSrc[2];
- *pvDsts[3] = pPixSrc[3];
- }
-};
-
-template <>
-struct StorePixels<64, 8>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[8])
- {
- // 8 x 16 bytes = 128 bytes, 16 pixels
- const simd4scalari *pSrc128 = reinterpret_cast<const simd4scalari *>(pSrc);
-
- simd4scalari **ppDsts128 = reinterpret_cast<simd4scalari **>(ppDsts);
-
- // order of pointers match SWR-Z layout
- *ppDsts128[0] = pSrc128[0]; // 0 1
- *ppDsts128[1] = pSrc128[1]; // 2 3
- *ppDsts128[2] = pSrc128[2]; // 4 5
- *ppDsts128[3] = pSrc128[3]; // 6 7
- *ppDsts128[4] = pSrc128[4]; // 8 9
- *ppDsts128[5] = pSrc128[5]; // A B
- *ppDsts128[6] = pSrc128[6]; // C D
- *ppDsts128[7] = pSrc128[7]; // E F
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StorePixels (32-bit pixel specialization)
-/// @brief Stores a 4x2 (AVX) raster-tile to two rows.
-/// @param pSrc - Pointer to source raster tile in SWRZ pixel order
-/// @param ppDsts - Array of destination pointers. Each pointer is
-/// to a single row of at most 16B.
-/// @tparam NumDests - Number of destination pointers. Each pair of
-/// pointers is for a 16-byte column of two rows.
-//////////////////////////////////////////////////////////////////////////
-template <>
-struct StorePixels<128, 8>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[8])
- {
- // Each 4-pixel row is 64 bytes.
- const simd4scalari* pPixSrc = (const simd4scalari*)pSrc;
-
- // Unswizzle from SWR-Z order
- simd4scalari** pvDsts = (simd4scalari**)&ppDsts[0];
- *pvDsts[0] = pPixSrc[0];
- *pvDsts[1] = pPixSrc[2];
- *pvDsts[2] = pPixSrc[1];
- *pvDsts[3] = pPixSrc[3];
- *pvDsts[4] = pPixSrc[4];
- *pvDsts[5] = pPixSrc[6];
- *pvDsts[6] = pPixSrc[5];
- *pvDsts[7] = pPixSrc[7];
- }
-};
-
-template <>
-struct StorePixels<128, 16>
-{
- static void Store(const uint8_t* pSrc, uint8_t* (&ppDsts)[16])
- {
- // 16 x 16 bytes = 256 bytes, 16 pixels
- const simd4scalari *pSrc128 = reinterpret_cast<const simd4scalari *>(pSrc);
-
- simd4scalari **ppDsts128 = reinterpret_cast<simd4scalari **>(ppDsts);
-
- for (uint32_t i = 0; i < 16; i += 4)
- {
- *ppDsts128[i + 0] = pSrc128[i + 0];
- *ppDsts128[i + 1] = pSrc128[i + 2];
- *ppDsts128[i + 2] = pSrc128[i + 1];
- *ppDsts128[i + 3] = pSrc128[i + 3];
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct ConvertPixelsSOAtoAOS
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Converts a SIMD from the Hot Tile to the destination format
- /// and converts from SOA to AOS.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDst - Pointer to destination surface or deswizzling buffer.
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel
-
- OSALIGNSIMD16(uint8_t) soaTile[MAX_RASTER_TILE_BYTES] = {0};
- OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES] = {0};
-
- // Convert from SrcFormat --> DstFormat
- simd16vector src;
- LoadSOA<SrcFormat>(pSrc, src);
- StoreSOA<DstFormat>(src, soaTile);
-
- // Convert from SOA --> AOS
- FormatTraits<DstFormat>::TransposeT::Transpose_simd16(soaTile, aosTile);
-
- // Store data into destination
- StorePixels<FormatTraits<DstFormat>::bpp, NumDests>::Store(aosTile, ppDsts);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
-/// Specialization for no format conversion
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT Format>
-struct ConvertPixelsSOAtoAOS<Format, Format>
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Converts a SIMD from the Hot Tile to the destination format
- /// and converts from SOA to AOS.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDst - Pointer to destination surface or deswizzling buffer.
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel
-
- OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES];
-
- // Convert from SOA --> AOS
- FormatTraits<Format>::TransposeT::Transpose_simd16(pSrc, aosTile);
-
- // Store data into destination
- StorePixels<FormatTraits<Format>::bpp, NumDests>::Store(aosTile, ppDsts);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// ConvertPixelsSOAtoAOS - Specialization conversion for B5G6R6_UNORM
-//////////////////////////////////////////////////////////////////////////
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B5G6R5_UNORM >
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Converts a SIMD from the Hot Tile to the destination format
- /// and converts from SOA to AOS.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDst - Pointer to destination surface or deswizzling buffer.
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- static const SWR_FORMAT SrcFormat = R32G32B32A32_FLOAT;
- static const SWR_FORMAT DstFormat = B5G6R5_UNORM;
-
- static const uint32_t MAX_RASTER_TILE_BYTES = 16 * 16; // 16 pixels * 16 bytes per pixel
-
- OSALIGNSIMD16(uint8_t) aosTile[MAX_RASTER_TILE_BYTES];
-
- // Load hot-tile
- simd16vector src, dst;
- LoadSOA<SrcFormat>(pSrc, src);
-
- // deswizzle
- dst.x = src[FormatTraits<DstFormat>::swizzle(0)];
- dst.y = src[FormatTraits<DstFormat>::swizzle(1)];
- dst.z = src[FormatTraits<DstFormat>::swizzle(2)];
-
- // clamp
- dst.x = Clamp<DstFormat>(dst.x, 0);
- dst.y = Clamp<DstFormat>(dst.y, 1);
- dst.z = Clamp<DstFormat>(dst.z, 2);
-
- // normalize
- dst.x = Normalize<DstFormat>(dst.x, 0);
- dst.y = Normalize<DstFormat>(dst.y, 1);
- dst.z = Normalize<DstFormat>(dst.z, 2);
-
- // pack
- simd16scalari packed = _simd16_castps_si(dst.x);
-
- SWR_ASSERT(FormatTraits<DstFormat>::GetBPC(0) == 5);
- SWR_ASSERT(FormatTraits<DstFormat>::GetBPC(1) == 6);
-
- packed = _simd16_or_si(packed, _simd16_slli_epi32(_simd16_castps_si(dst.y), 5));
- packed = _simd16_or_si(packed, _simd16_slli_epi32(_simd16_castps_si(dst.z), 5 + 6));
-
- // pack low 16 bits of each 32 bit lane to low 128 bits of dst
- uint32_t *pPacked = (uint32_t*)&packed;
- uint16_t *pAosTile = (uint16_t*)&aosTile[0];
- for (uint32_t t = 0; t < KNOB_SIMD16_WIDTH; ++t)
- {
- *pAosTile++ = *pPacked++;
- }
-
- // Store data into destination
- StorePixels<FormatTraits<DstFormat>::bpp, NumDests>::Store(aosTile, ppDsts);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// ConvertPixelsSOAtoAOS - Conversion for SIMD pixel (4x2 or 2x2)
-//////////////////////////////////////////////////////////////////////////
-template<>
-struct ConvertPixelsSOAtoAOS<R32_FLOAT, R24_UNORM_X8_TYPELESS>
-{
- static const SWR_FORMAT SrcFormat = R32_FLOAT;
- static const SWR_FORMAT DstFormat = R24_UNORM_X8_TYPELESS;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Converts a SIMD from the Hot Tile to the destination format
- /// and converts from SOA to AOS.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDst - Pointer to destination surface or deswizzling buffer.
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- simd16scalar comp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
-
- // clamp
- const simd16scalar zero = _simd16_setzero_ps();
- const simd16scalar ones = _simd16_set1_ps(1.0f);
-
- comp = _simd16_max_ps(comp, zero);
- comp = _simd16_min_ps(comp, ones);
-
- // normalize
- comp = _simd16_mul_ps(comp, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));
-
- simd16scalari temp = _simd16_cvtps_epi32(comp);
-
- // swizzle
- temp = _simd16_permute_epi32(temp, _simd16_set_epi32(15, 14, 11, 10, 13, 12, 9, 8, 7, 6, 3, 2, 5, 4, 1, 0));
-
- // merge/store data into destination but don't overwrite the X8 bits
- simdscalari destlo = _simd_loadu2_si(reinterpret_cast<simd4scalari *>(ppDsts[1]), reinterpret_cast<simd4scalari *>(ppDsts[0]));
- simdscalari desthi = _simd_loadu2_si(reinterpret_cast<simd4scalari *>(ppDsts[3]), reinterpret_cast<simd4scalari *>(ppDsts[2]));
-
- simd16scalari dest = _simd16_setzero_si();
-
- dest = _simd16_insert_si(dest, destlo, 0);
- dest = _simd16_insert_si(dest, desthi, 1);
-
- simd16scalari mask = _simd16_set1_epi32(0x00FFFFFF);
-
- dest = _simd16_or_si(_simd16_andnot_si(mask, dest), _simd16_and_si(mask, temp));
-
- _simd_storeu2_si(reinterpret_cast<simd4scalari *>(ppDsts[1]), reinterpret_cast<simd4scalari *>(ppDsts[0]), _simd16_extract_si(dest, 0));
- _simd_storeu2_si(reinterpret_cast<simd4scalari *>(ppDsts[3]), reinterpret_cast<simd4scalari *>(ppDsts[2]), _simd16_extract_si(dest, 1));
- }
-};
-
-template<SWR_FORMAT DstFormat>
-INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDst1, uint8_t* pDst2, uint8_t* pDst3)
-{
- // swizzle rgba -> bgra while we load
- simd16scalar comp0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(0) * sizeof(simd16scalar))); // float32 rrrrrrrrrrrrrrrr
- simd16scalar comp1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(1) * sizeof(simd16scalar))); // float32 gggggggggggggggg
- simd16scalar comp2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(2) * sizeof(simd16scalar))); // float32 bbbbbbbbbbbbbbbb
- simd16scalar comp3 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(3) * sizeof(simd16scalar))); // float32 aaaaaaaaaaaaaaaa
-
- // clamp
- const simd16scalar zero = _simd16_setzero_ps();
- const simd16scalar ones = _simd16_set1_ps(1.0f);
-
- comp0 = _simd16_max_ps(comp0, zero);
- comp0 = _simd16_min_ps(comp0, ones);
-
- comp1 = _simd16_max_ps(comp1, zero);
- comp1 = _simd16_min_ps(comp1, ones);
-
- comp2 = _simd16_max_ps(comp2, zero);
- comp2 = _simd16_min_ps(comp2, ones);
-
- comp3 = _simd16_max_ps(comp3, zero);
- comp3 = _simd16_min_ps(comp3, ones);
-
- // gamma-correct only rgb
- if (FormatTraits<DstFormat>::isSRGB)
- {
- comp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, comp0);
- comp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, comp1);
- comp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, comp2);
- }
-
- // convert float components from 0.0f..1.0f to correct scale for 0..255 dest format
- comp0 = _simd16_mul_ps(comp0, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));
- comp1 = _simd16_mul_ps(comp1, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));
- comp2 = _simd16_mul_ps(comp2, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));
- comp3 = _simd16_mul_ps(comp3, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(3)));
-
- // moving to 16 wide integer vector types
- simd16scalari src0 = _simd16_cvtps_epi32(comp0); // padded byte rrrrrrrrrrrrrrrr
- simd16scalari src1 = _simd16_cvtps_epi32(comp1); // padded byte gggggggggggggggg
- simd16scalari src2 = _simd16_cvtps_epi32(comp2); // padded byte bbbbbbbbbbbbbbbb
- simd16scalari src3 = _simd16_cvtps_epi32(comp3); // padded byte aaaaaaaaaaaaaaaa
-
- // SOA to AOS conversion
- src1 = _simd16_slli_epi32(src1, 8);
- src2 = _simd16_slli_epi32(src2, 16);
- src3 = _simd16_slli_epi32(src3, 24);
-
- simd16scalari final = _simd16_or_si(_simd16_or_si(src0, src1), _simd16_or_si(src2, src3)); // 0 1 2 3 4 5 6 7 8 9 A B C D E F
-
- // de-swizzle conversion
-#if 1
- simd16scalari final0 = _simd16_permute2f128_si(final, final, 0xA0); // (2, 2, 0, 0) // 0 1 2 3 0 1 2 3 8 9 A B 8 9 A B
- simd16scalari final1 = _simd16_permute2f128_si(final, final, 0xF5); // (3, 3, 1, 1) // 4 5 6 7 4 5 6 7 C D E F C D E F
-
- final = _simd16_shuffle_epi64(final0, final1, 0xCC); // (1 1 0 0 1 1 0 0) // 0 1 4 5 2 3 6 7 8 9 C D A B E F
-
-#else
- final = _simd16_permute_epi32(final, _simd16_set_epi32(15, 14, 11, 10, 13, 12, 9, 8, 7, 6, 3, 2, 5, 4, 1, 0));
-
-#endif
- // store 8x2 memory order:
- // row0: [ pDst0, pDst2 ] = { 0 1 4 5 }, { 8 9 C D }
- // row1: [ pDst1, pDst3 ] = { 2 3 6 7 }, { A B E F }
- _simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst1), reinterpret_cast<simd4scalari *>(pDst0), _simd16_extract_si(final, 0));
- _simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst3), reinterpret_cast<simd4scalari *>(pDst2), _simd16_extract_si(final, 1));
-}
-
-template<SWR_FORMAT DstFormat>
-INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst1)
-{
- static const uint32_t offset = sizeof(simdscalar);
-
- // swizzle rgba -> bgra while we load
- simdscalar vComp0 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(0))*offset)); // float32 rrrrrrrr
- simdscalar vComp1 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(1))*offset)); // float32 gggggggg
- simdscalar vComp2 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(2))*offset)); // float32 bbbbbbbb
- simdscalar vComp3 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(3))*offset)); // float32 aaaaaaaa
-
- // clamp
- vComp0 = _simd_max_ps(vComp0, _simd_setzero_ps());
- vComp0 = _simd_min_ps(vComp0, _simd_set1_ps(1.0f));
-
- vComp1 = _simd_max_ps(vComp1, _simd_setzero_ps());
- vComp1 = _simd_min_ps(vComp1, _simd_set1_ps(1.0f));
-
- vComp2 = _simd_max_ps(vComp2, _simd_setzero_ps());
- vComp2 = _simd_min_ps(vComp2, _simd_set1_ps(1.0f));
-
- vComp3 = _simd_max_ps(vComp3, _simd_setzero_ps());
- vComp3 = _simd_min_ps(vComp3, _simd_set1_ps(1.0f));
-
- if (FormatTraits<DstFormat>::isSRGB)
- {
- // Gamma-correct only rgb
- vComp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, vComp0);
- vComp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, vComp1);
- vComp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, vComp2);
- }
-
- // convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format
- vComp0 = _simd_mul_ps(vComp0, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));
- vComp1 = _simd_mul_ps(vComp1, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));
- vComp2 = _simd_mul_ps(vComp2, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));
- vComp3 = _simd_mul_ps(vComp3, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(3)));
-
- // moving to 8 wide integer vector types
- simdscalari src0 = _simd_cvtps_epi32(vComp0); // padded byte rrrrrrrr
- simdscalari src1 = _simd_cvtps_epi32(vComp1); // padded byte gggggggg
- simdscalari src2 = _simd_cvtps_epi32(vComp2); // padded byte bbbbbbbb
- simdscalari src3 = _simd_cvtps_epi32(vComp3); // padded byte aaaaaaaa
-
-#if KNOB_ARCH <= KNOB_ARCH_AVX
-
- // splitting into two sets of 4 wide integer vector types
- // because AVX doesn't have instructions to support this operation at 8 wide
- simd4scalari srcLo0 = _mm256_castsi256_si128(src0); // 000r000r000r000r
- simd4scalari srcLo1 = _mm256_castsi256_si128(src1); // 000g000g000g000g
- simd4scalari srcLo2 = _mm256_castsi256_si128(src2); // 000b000b000b000b
- simd4scalari srcLo3 = _mm256_castsi256_si128(src3); // 000a000a000a000a
-
- simd4scalari srcHi0 = _mm256_extractf128_si256(src0, 1); // 000r000r000r000r
- simd4scalari srcHi1 = _mm256_extractf128_si256(src1, 1); // 000g000g000g000g
- simd4scalari srcHi2 = _mm256_extractf128_si256(src2, 1); // 000b000b000b000b
- simd4scalari srcHi3 = _mm256_extractf128_si256(src3, 1); // 000a000a000a000a
-
- srcLo1 = _mm_slli_si128(srcLo1, 1); // 00g000g000g000g0
- srcHi1 = _mm_slli_si128(srcHi1, 1); // 00g000g000g000g0
- srcLo2 = _mm_slli_si128(srcLo2, 2); // 0b000b000b000b00
- srcHi2 = _mm_slli_si128(srcHi2, 2); // 0b000b000b000b00
- srcLo3 = _mm_slli_si128(srcLo3, 3); // a000a000a000a000
- srcHi3 = _mm_slli_si128(srcHi3, 3); // a000a000a000a000
-
- srcLo0 = SIMD128::or_si(srcLo0, srcLo1); // 00gr00gr00gr00gr
- srcLo2 = SIMD128::or_si(srcLo2, srcLo3); // ab00ab00ab00ab00
-
- srcHi0 = SIMD128::or_si(srcHi0, srcHi1); // 00gr00gr00gr00gr
- srcHi2 = SIMD128::or_si(srcHi2, srcHi3); // ab00ab00ab00ab00
-
- srcLo0 = SIMD128::or_si(srcLo0, srcLo2); // abgrabgrabgrabgr
- srcHi0 = SIMD128::or_si(srcHi0, srcHi2); // abgrabgrabgrabgr
-
- // unpack into rows that get the tiling order correct
- simd4scalari vRow00 = SIMD128::unpacklo_epi64(srcLo0, srcHi0); // abgrabgrabgrabgrabgrabgrabgrabgr
- simd4scalari vRow10 = SIMD128::unpackhi_epi64(srcLo0, srcHi0);
-
- simdscalari final = _mm256_castsi128_si256(vRow00);
- final = _mm256_insertf128_si256(final, vRow10, 1);
-
-#else
-
- // logic is as above, only wider
- src1 = _mm256_slli_si256(src1, 1);
- src2 = _mm256_slli_si256(src2, 2);
- src3 = _mm256_slli_si256(src3, 3);
-
- src0 = _mm256_or_si256(src0, src1);
- src2 = _mm256_or_si256(src2, src3);
-
- simdscalari final = _mm256_or_si256(src0, src2);
-
- // adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3
- final = _mm256_permute4x64_epi64(final, 0xD8);
-#endif
-
- _simd_storeu2_si((simd4scalari*)pDst1, (simd4scalari*)pDst, final);
-}
-
-template<SWR_FORMAT DstFormat>
-INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst0, uint8_t* pDst1, uint8_t* pDst2, uint8_t* pDst3)
-{
- // swizzle rgba -> bgra while we load
- simd16scalar comp0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(0) * sizeof(simd16scalar))); // float32 rrrrrrrrrrrrrrrr
- simd16scalar comp1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(1) * sizeof(simd16scalar))); // float32 gggggggggggggggg
- simd16scalar comp2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc + FormatTraits<DstFormat>::swizzle(2) * sizeof(simd16scalar))); // float32 bbbbbbbbbbbbbbbb
-
- // clamp
- const simd16scalar zero = _simd16_setzero_ps();
- const simd16scalar ones = _simd16_set1_ps(1.0f);
-
- comp0 = _simd16_max_ps(comp0, zero);
- comp0 = _simd16_min_ps(comp0, ones);
-
- comp1 = _simd16_max_ps(comp1, zero);
- comp1 = _simd16_min_ps(comp1, ones);
-
- comp2 = _simd16_max_ps(comp2, zero);
- comp2 = _simd16_min_ps(comp2, ones);
-
- // gamma-correct only rgb
- if (FormatTraits<DstFormat>::isSRGB)
- {
- comp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, comp0);
- comp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, comp1);
- comp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, comp2);
- }
-
- // convert float components from 0.0f..1.0f to correct scale for 0..255 dest format
- comp0 = _simd16_mul_ps(comp0, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));
- comp1 = _simd16_mul_ps(comp1, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));
- comp2 = _simd16_mul_ps(comp2, _simd16_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));
-
- // moving to 16 wide integer vector types
- simd16scalari src0 = _simd16_cvtps_epi32(comp0); // padded byte rrrrrrrrrrrrrrrr
- simd16scalari src1 = _simd16_cvtps_epi32(comp1); // padded byte gggggggggggggggg
- simd16scalari src2 = _simd16_cvtps_epi32(comp2); // padded byte bbbbbbbbbbbbbbbb
-
- // SOA to AOS conversion
- src1 = _simd16_slli_epi32(src1, 8);
- src2 = _simd16_slli_epi32(src2, 16);
-
- simd16scalari final = _simd16_or_si(_simd16_or_si(src0, src1), src2); // 0 1 2 3 4 5 6 7 8 9 A B C D E F
-
- // de-swizzle conversion
-#if 1
- simd16scalari final0 = _simd16_permute2f128_si(final, final, 0xA0); // (2, 2, 0, 0) // 0 1 2 3 0 1 2 3 8 9 A B 8 9 A B
- simd16scalari final1 = _simd16_permute2f128_si(final, final, 0xF5); // (3, 3, 1, 1) // 4 5 6 7 4 5 6 7 C D E F C D E F
-
- final = _simd16_shuffle_epi64(final0, final1, 0xCC); // (1 1 0 0 1 1 0 0) // 0 1 4 5 2 3 6 7 8 9 C D A B E F
-
-#else
- final = _simd16_permute_epi32(final, _simd16_set_epi32(15, 14, 11, 10, 13, 12, 9, 8, 7, 6, 3, 2, 5, 4, 1, 0));
-
-#endif
- // store 8x2 memory order:
- // row0: [ pDst0, pDst2 ] = { 0 1 4 5 }, { 8 9 C D }
- // row1: [ pDst1, pDst3 ] = { 2 3 6 7 }, { A B E F }
- _simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst1), reinterpret_cast<simd4scalari *>(pDst0), _simd16_extract_si(final, 0));
- _simd_storeu2_si(reinterpret_cast<simd4scalari *>(pDst3), reinterpret_cast<simd4scalari *>(pDst2), _simd16_extract_si(final, 1));
-}
-
-template<SWR_FORMAT DstFormat>
-INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst1)
-{
- static const uint32_t offset = sizeof(simdscalar);
-
- // swizzle rgba -> bgra while we load
- simdscalar vComp0 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(0))*offset)); // float32 rrrrrrrr
- simdscalar vComp1 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(1))*offset)); // float32 gggggggg
- simdscalar vComp2 = _simd_load_ps((const float*)(pSrc + (FormatTraits<DstFormat>::swizzle(2))*offset)); // float32 bbbbbbbb
- // clamp
- vComp0 = _simd_max_ps(vComp0, _simd_setzero_ps());
- vComp0 = _simd_min_ps(vComp0, _simd_set1_ps(1.0f));
-
- vComp1 = _simd_max_ps(vComp1, _simd_setzero_ps());
- vComp1 = _simd_min_ps(vComp1, _simd_set1_ps(1.0f));
-
- vComp2 = _simd_max_ps(vComp2, _simd_setzero_ps());
- vComp2 = _simd_min_ps(vComp2, _simd_set1_ps(1.0f));
-
- if (FormatTraits<DstFormat>::isSRGB)
- {
- // Gamma-correct only rgb
- vComp0 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(0, vComp0);
- vComp1 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(1, vComp1);
- vComp2 = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(2, vComp2);
- }
-
- // convert float components from 0.0f .. 1.0f to correct scale for 0 .. 255 dest format
- vComp0 = _simd_mul_ps(vComp0, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(0)));
- vComp1 = _simd_mul_ps(vComp1, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(1)));
- vComp2 = _simd_mul_ps(vComp2, _simd_set1_ps(FormatTraits<DstFormat>::fromFloat(2)));
-
- // moving to 8 wide integer vector types
- simdscalari src0 = _simd_cvtps_epi32(vComp0); // padded byte rrrrrrrr
- simdscalari src1 = _simd_cvtps_epi32(vComp1); // padded byte gggggggg
- simdscalari src2 = _simd_cvtps_epi32(vComp2); // padded byte bbbbbbbb
-
-#if KNOB_ARCH <= KNOB_ARCH_AVX
-
- // splitting into two sets of 4 wide integer vector types
- // because AVX doesn't have instructions to support this operation at 8 wide
- simd4scalari srcLo0 = _mm256_castsi256_si128(src0); // 000r000r000r000r
- simd4scalari srcLo1 = _mm256_castsi256_si128(src1); // 000g000g000g000g
- simd4scalari srcLo2 = _mm256_castsi256_si128(src2); // 000b000b000b000b
-
- simd4scalari srcHi0 = _mm256_extractf128_si256(src0, 1); // 000r000r000r000r
- simd4scalari srcHi1 = _mm256_extractf128_si256(src1, 1); // 000g000g000g000g
- simd4scalari srcHi2 = _mm256_extractf128_si256(src2, 1); // 000b000b000b000b
-
- srcLo1 = _mm_slli_si128(srcLo1, 1); // 00g000g000g000g0
- srcHi1 = _mm_slli_si128(srcHi1, 1); // 00g000g000g000g0
- srcLo2 = _mm_slli_si128(srcLo2, 2); // 0b000b000b000b00
- srcHi2 = _mm_slli_si128(srcHi2, 2); // 0b000b000b000b00
-
- srcLo0 = SIMD128::or_si(srcLo0, srcLo1); // 00gr00gr00gr00gr
-
- srcHi0 = SIMD128::or_si(srcHi0, srcHi1); // 00gr00gr00gr00gr
-
- srcLo0 = SIMD128::or_si(srcLo0, srcLo2); // 0bgr0bgr0bgr0bgr
- srcHi0 = SIMD128::or_si(srcHi0, srcHi2); // 0bgr0bgr0bgr0bgr
-
- // unpack into rows that get the tiling order correct
- simd4scalari vRow00 = SIMD128::unpacklo_epi64(srcLo0, srcHi0); // 0bgr0bgr0bgr0bgr0bgr0bgr0bgr0bgr
- simd4scalari vRow10 = SIMD128::unpackhi_epi64(srcLo0, srcHi0);
-
- simdscalari final = _mm256_castsi128_si256(vRow00);
- final = _mm256_insertf128_si256(final, vRow10, 1);
-
-#else
-
- // logic is as above, only wider
- src1 = _mm256_slli_si256(src1, 1);
- src2 = _mm256_slli_si256(src2, 2);
-
- src0 = _mm256_or_si256(src0, src1);
-
- simdscalari final = _mm256_or_si256(src0, src2);
-
- // adjust the data to get the tiling order correct 0 1 2 3 -> 0 2 1 3
- final = _mm256_permute4x64_epi64(final, 0xD8);
-
-#endif
-
- _simd_storeu2_si((simd4scalari*)pDst1, (simd4scalari*)pDst, final);
-}
-
-template<>
-struct ConvertPixelsSOAtoAOS<R32G32B32A32_FLOAT, B8G8R8A8_UNORM>
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvert<B8G8R8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS<R32G32B32A32_FLOAT, B8G8R8X8_UNORM>
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvertNoAlpha<B8G8R8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB >
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvert<B8G8R8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB >
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvertNoAlpha<B8G8R8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8A8_UNORM >
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvert<R8G8B8A8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8X8_UNORM >
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvertNoAlpha<R8G8B8X8_UNORM>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB >
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvert<R8G8B8A8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-template<>
-struct ConvertPixelsSOAtoAOS < R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB >
-{
- template <size_t NumDests>
- INLINE static void Convert(const uint8_t* pSrc, uint8_t* (&ppDsts)[NumDests])
- {
- FlatConvertNoAlpha<R8G8B8X8_UNORM_SRGB>(pSrc, ppDsts[0], ppDsts[1], ppDsts[2], ppDsts[3]);
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StoreRasterTile
-//////////////////////////////////////////////////////////////////////////
-template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct StoreRasterTile
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from hot tile source which is always float.
- /// @param pSrc - Pointer to raster tile.
- /// @param x, y - Coordinates to raster tile.
- /// @param output - output color
- INLINE static void GetSwizzledSrcColor(
- uint8_t* pSrc,
- uint32_t x, uint32_t y,
- float outputColor[4])
- {
- typedef SimdTile_16<SrcFormat, DstFormat> SimdT;
-
- SimdT *pSrcSimdTiles = reinterpret_cast<SimdT *>(pSrc);
-
- // Compute which simd tile we're accessing within 8x8 tile.
- // i.e. Compute linear simd tile coordinate given (x, y) in pixel coordinates.
- uint32_t simdIndex = (y / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM) + (x / SIMD16_TILE_X_DIM);
-
- SimdT *pSimdTile = &pSrcSimdTiles[simdIndex];
-
- uint32_t simdOffset = (y % SIMD16_TILE_Y_DIM) * SIMD16_TILE_X_DIM + (x % SIMD16_TILE_X_DIM);
-
- pSimdTile->GetSwizzledColor(simdOffset, outputColor);
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
- {
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- // For each raster tile pixel (rx, ry)
- for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
- {
- for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
- {
- // Perform bounds checking.
- if (((x + rx) < lodWidth) &&
- ((y + ry) < lodHeight))
- {
- float srcColor[4];
- GetSwizzledSrcColor(pSrc, rx, ry, srcColor);
-
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
- pDstSurface->arrayIndex + renderTargetArrayIndex, pDstSurface->arrayIndex + renderTargetArrayIndex,
- sampleNum, pDstSurface->lod, pDstSurface);
- {
- ConvertPixelFromFloat<DstFormat>(pDst, srcColor);
- }
- }
- }
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Resolves an 8x8 raster tile to the resolve destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- /// @param sampleOffset - Offset between adjacent multisamples
- INLINE static void Resolve(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
- {
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
-
- // For each raster tile pixel (rx, ry)
- for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
- {
- for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
- {
- // Perform bounds checking.
- if (((x + rx) < lodWidth) &&
- ((y + ry) < lodHeight))
- {
- // Sum across samples
- float resolveColor[4] = {0};
- for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
- {
- float sampleColor[4] = {0};
- uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
- GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
- resolveColor[0] += sampleColor[0];
- resolveColor[1] += sampleColor[1];
- resolveColor[2] += sampleColor[2];
- resolveColor[3] += sampleColor[3];
- }
-
- // Divide by numSamples to average
- resolveColor[0] *= oneOverNumSamples;
- resolveColor[1] *= oneOverNumSamples;
- resolveColor[2] *= oneOverNumSamples;
- resolveColor[3] *= oneOverNumSamples;
-
- // Use the resolve surface state
- SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->xpAuxBaseAddress;
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
- pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,
- 0, pResolveSurface->lod, pResolveSurface);
- {
- ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);
- }
- }
- }
- }
- }
-
-};
-
-template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile : StoreRasterTile<TTraits, SrcFormat, DstFormat>
-{};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 8bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 8>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 8>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
- static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;
- const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;
-
- uint8_t* ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + pDstSurface->pitch, // row 1, col 0
- pDst + dx / 2, // row 0, col 1
- pDst + pDstSurface->pitch + dx / 2 // row 1, col 1
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)
- {
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dx;
- ppDsts[1] += dx;
- ppDsts[2] += dx;
- ppDsts[3] += dx;
- }
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 16bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 16>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 16>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
- static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;
- const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;
-
- uint8_t* ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + pDstSurface->pitch, // row 1, col 0
- pDst + dx / 2, // row 0, col 1
- pDst + pDstSurface->pitch + dx / 2 // row 1, col 1
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)
- {
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dx;
- ppDsts[1] += dx;
- ppDsts[2] += dx;
- ppDsts[3] += dx;
- }
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 32bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 32>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 32>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
- static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;
- const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;
-
- uint8_t* ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + pDstSurface->pitch, // row 1, col 0
- pDst + dx / 2, // row 0, col 1
- pDst + pDstSurface->pitch + dx / 2 // row 1, col 1
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)
- {
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dx;
- ppDsts[1] += dx;
- ppDsts[2] += dx;
- ppDsts[3] += dx;
- }
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 64bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 64>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
- static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;
- static const size_t MAX_DST_COLUMN_BYTES = 16;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;
- const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch;
-
- // we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)
- static_assert(dx == MAX_DST_COLUMN_BYTES * 4, "Invalid column offsets");
-
- uint8_t *ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + pDstSurface->pitch, // row 1, col 0
- pDst + MAX_DST_COLUMN_BYTES, // row 0, col 1
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES, // row 1, col 1
- pDst + MAX_DST_COLUMN_BYTES * 2, // row 0, col 2
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 2, // row 1, col 2
- pDst + MAX_DST_COLUMN_BYTES * 3, // row 0, col 3
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 3 // row 1, col 3
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)
- {
- ppDsts[i] += dy;
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - SWR_TILE_MODE_NONE specialization for 128bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_NONE, 128>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
- static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;
- static const size_t MAX_DST_COLUMN_BYTES = 16;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;
- const uint32_t dy = SIMD16_TILE_Y_DIM * pDstSurface->pitch;
-
- // we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)
- static_assert(dx == MAX_DST_COLUMN_BYTES * 8, "Invalid column offsets");
-
- uint8_t* ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + pDstSurface->pitch, // row 1, col 0
- pDst + MAX_DST_COLUMN_BYTES, // row 0, col 1
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES, // row 1, col 1
- pDst + MAX_DST_COLUMN_BYTES * 2, // row 0, col 2
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 2, // row 1, col 2
- pDst + MAX_DST_COLUMN_BYTES * 3, // row 0, col 3
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 3, // row 1, col 3
- pDst + MAX_DST_COLUMN_BYTES * 4, // row 0, col 4
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 4, // row 1, col 4
- pDst + MAX_DST_COLUMN_BYTES * 5, // row 0, col 5
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 5, // row 1, col 5
- pDst + MAX_DST_COLUMN_BYTES * 6, // row 0, col 6
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 6, // row 1, col 6
- pDst + MAX_DST_COLUMN_BYTES * 7, // row 0, col 7
- pDst + pDstSurface->pitch + MAX_DST_COLUMN_BYTES * 7, // row 1, col 7
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)
- {
- ppDsts[i] += dy;
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 8bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 8>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 8>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- static const uint32_t DestRowWidthBytes = 16; // 16B rows
-
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
- // We can compute the offsets to each column within the raster tile once and increment from these.
- // There will be 4 8x2 simd tiles in an 8x8 raster tile.
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;
-
- // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
- uint8_t *ppDsts[] =
- {
- pDst,
- pDst + DestRowWidthBytes,
- pDst + DestRowWidthBytes / 4,
- pDst + DestRowWidthBytes + DestRowWidthBytes / 4
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 16bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 16>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 16>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- static const uint32_t DestRowWidthBytes = 16; // 16B rows
-
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
- // We can compute the offsets to each column within the raster tile once and increment from these.
- // There will be 4 8x2 simd tiles in an 8x8 raster tile.
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;
-
- // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
- uint8_t *ppDsts[] =
- {
- pDst,
- pDst + DestRowWidthBytes,
- pDst + DestRowWidthBytes / 2,
- pDst + DestRowWidthBytes + DestRowWidthBytes / 2
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - TILE_MODE_XMAJOR specialization for 32bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_XMAJOR, 32>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_XMAJOR, 32>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
- static const size_t DST_BYTES_PER_PIXEL = FormatTraits<DstFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- static const uint32_t DestRowWidthBytes = 512; // 512B rows
-
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- // TileX is a row-major tiling mode where each 4KB tile consist of 8 x 512B rows.
- // We can compute the offsets to each column within the raster tile once and increment from these.
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- const uint32_t dx = SIMD16_TILE_X_DIM * DST_BYTES_PER_PIXEL;
- const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes - KNOB_TILE_X_DIM * DST_BYTES_PER_PIXEL;
-
- uint8_t* ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + DestRowWidthBytes, // row 1, col 0
- pDst + dx / 2, // row 0, col 1
- pDst + DestRowWidthBytes + dx / 2 // row 1, col 1
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- for (uint32_t xx = 0; xx < KNOB_TILE_X_DIM; xx += SIMD16_TILE_X_DIM)
- {
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dx;
- ppDsts[1] += dx;
- ppDsts[2] += dx;
- ppDsts[3] += dx;
- }
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 32bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 32>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 32>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- static const uint32_t DestRowWidthBytes = 16; // 16B rows
- static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows.
-
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
- // We can compute the offsets to each column within the raster tile once and increment from these.
- // There will be 4 8x2 simd tiles in an 8x8 raster tile.
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- // we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)
- const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;
-
- // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
- uint8_t *ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + DestRowWidthBytes, // row 1, col 0
- pDst + DestColumnBytes, // row 0, col 1
- pDst + DestRowWidthBytes + DestColumnBytes // row 1, col 1
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- ppDsts[0] += dy;
- ppDsts[1] += dy;
- ppDsts[2] += dy;
- ppDsts[3] += dy;
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - TILE_MODE_YMAJOR specialization for 64bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 64>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 64>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- static const uint32_t DestRowWidthBytes = 16; // 16B rows
- static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows.
-
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
- // We can compute the offsets to each column within the raster tile once and increment from these.
- // There will be 4 8x2 simd tiles in an 8x8 raster tile.
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- // we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)
- const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;
-
- // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
- uint8_t *ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + DestRowWidthBytes, // row 1, col 0
- pDst + DestColumnBytes, // row 0, col 1
- pDst + DestRowWidthBytes + DestColumnBytes, // row 1, col 1
- pDst + DestColumnBytes * 2, // row 0, col 2
- pDst + DestRowWidthBytes + DestColumnBytes * 2, // row 1, col 2
- pDst + DestColumnBytes * 3, // row 0, col 3
- pDst + DestRowWidthBytes + DestColumnBytes * 3 // row 1, col 3
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)
- {
- ppDsts[i] += dy;
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// OptStoreRasterTile - SWR_TILE_MODE_YMAJOR specialization for 128bpp
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct OptStoreRasterTile< TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, DstFormat>
-{
- typedef StoreRasterTile<TilingTraits<SWR_TILE_MODE_YMAJOR, 128>, SrcFormat, DstFormat> GenericStoreTile;
- static const size_t SRC_BYTES_PER_PIXEL = FormatTraits<SrcFormat>::bpp / 8;
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores an 8x8 raster tile to the destination surface.
- /// @param pSrc - Pointer to raster tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to raster tile.
- INLINE static void Store(
- uint8_t *pSrc,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t sampleNum, uint32_t renderTargetArrayIndex)
- {
- static const uint32_t DestRowWidthBytes = 16; // 16B rows
- static const uint32_t DestColumnBytes = DestRowWidthBytes * 32; // 16B x 32 rows.
-
- // Punt non-full tiles to generic store
- uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
- uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
-
- if (x + KNOB_TILE_X_DIM > lodWidth || y + KNOB_TILE_Y_DIM > lodHeight)
- {
- return GenericStoreTile::Store(pSrc, pDstSurface, x, y, sampleNum, renderTargetArrayIndex);
- }
-
- // TileY is a column-major tiling mode where each 4KB tile consist of 8 columns of 32 x 16B rows.
- // We can compute the offsets to each column within the raster tile once and increment from these.
- // There will be 4 8x2 simd tiles in an 8x8 raster tile.
- uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
-
- // we have to break these large spans up, since ConvertPixelsSOAtoAOS() can only work on max 16B spans (a TileY limitation)
- const uint32_t dy = SIMD16_TILE_Y_DIM * DestRowWidthBytes;
-
- // The Hot Tile uses a row-major tiling mode and has a larger memory footprint. So we iterate in a row-major pattern.
- uint8_t *ppDsts[] =
- {
- pDst, // row 0, col 0
- pDst + DestRowWidthBytes, // row 1, col 0
- pDst + DestColumnBytes, // row 0, col 1
- pDst + DestRowWidthBytes + DestColumnBytes, // row 1, col 1
- pDst + DestColumnBytes * 2, // row 0, col 2
- pDst + DestRowWidthBytes + DestColumnBytes * 2, // row 1, col 2
- pDst + DestColumnBytes * 3, // row 0, col 3
- pDst + DestRowWidthBytes + DestColumnBytes * 3, // row 1, col 3
- pDst + DestColumnBytes * 4, // row 0, col 4
- pDst + DestRowWidthBytes + DestColumnBytes * 4, // row 1, col 4
- pDst + DestColumnBytes * 5, // row 0, col 5
- pDst + DestRowWidthBytes + DestColumnBytes * 5, // row 1, col 5
- pDst + DestColumnBytes * 6, // row 0, col 6
- pDst + DestRowWidthBytes + DestColumnBytes * 6, // row 1, col 6
- pDst + DestColumnBytes * 7, // row 0, col 7
- pDst + DestRowWidthBytes + DestColumnBytes * 7 // row 1, col 7
- };
-
- for (uint32_t yy = 0; yy < KNOB_TILE_Y_DIM; yy += SIMD16_TILE_Y_DIM)
- {
- // Raster tile width is same as simd16 tile width
- static_assert(KNOB_TILE_X_DIM == SIMD16_TILE_X_DIM, "Invalid tile x dim");
-
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ppDsts);
-
- pSrc += KNOB_SIMD16_WIDTH * SRC_BYTES_PER_PIXEL;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(ppDsts); i += 1)
- {
- ppDsts[i] += dy;
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// StoreMacroTile - Stores a macro tile which consists of raster tiles.
-//////////////////////////////////////////////////////////////////////////
-template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
-struct StoreMacroTile
-{
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores a macrotile to the destination surface using safe implementation.
- /// @param pSrc - Pointer to macro tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to macro tile
- static void StoreGeneric(
- uint8_t *pSrcHotTile,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)
- {
- PFN_STORE_TILES_INTERNAL pfnStore;
- pfnStore = StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
-
- // Store each raster tile from the hot tile to the destination surface.
- for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
- {
- pfnStore(pSrcHotTile, pDstSurface, (x + col), (y + row), sampleNum, renderTargetArrayIndex);
- pSrcHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
- }
- }
- }
-
- }
-
- typedef void(*PFN_STORE_TILES_INTERNAL)(uint8_t*, SWR_SURFACE_STATE*, uint32_t, uint32_t, uint32_t, uint32_t);
- //////////////////////////////////////////////////////////////////////////
- /// @brief Stores a macrotile to the destination surface.
- /// @param pSrc - Pointer to macro tile.
- /// @param pDstSurface - Destination surface state
- /// @param x, y - Coordinates to macro tile
- static void Store(
- uint8_t *pSrcHotTile,
- SWR_SURFACE_STATE* pDstSurface,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex)
- {
- PFN_STORE_TILES_INTERNAL pfnStore[SWR_MAX_NUM_MULTISAMPLES];
-
- for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
- {
- size_t dstSurfAddress = (size_t)ComputeSurfaceAddress<false, false>(
- 0,
- 0,
- pDstSurface->arrayIndex + renderTargetArrayIndex, // z for 3D surfaces
- pDstSurface->arrayIndex + renderTargetArrayIndex, // array index for 2D arrays
- sampleNum,
- pDstSurface->lod,
- pDstSurface);
-
- // Only support generic store-tile if lod surface doesn't start on a page boundary and is non-linear
- bool bForceGeneric = ((pDstSurface->tileMode != SWR_TILE_NONE) && (0 != (dstSurfAddress & 0xfff))) ||
- (pDstSurface->bInterleavedSamples);
-
- pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
- }
-
- // Save original for pSrcHotTile resolve.
- uint8_t *pResolveSrcHotTile = pSrcHotTile;
-
- // Store each raster tile from the hot tile to the destination surface.
- for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- for(uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
- {
- pfnStore[sampleNum](pSrcHotTile, pDstSurface, (x + col), (y + row), sampleNum, renderTargetArrayIndex);
- pSrcHotTile += KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
- }
- }
- }
-
- if (pDstSurface->xpAuxBaseAddress)
- {
- uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
- // Store each raster tile from the hot tile to the destination surface.
- for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
- {
- for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
- {
- StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);
- pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;
- }
- }
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// InitStoreTilesTable - Helper for setting up the tables.
-template <SWR_TILE_MODE TTileMode, size_t NumTileModesT, size_t ArraySizeT>
-void InitStoreTilesTableColor_Half1(
- PFN_STORE_TILES (&table)[NumTileModesT][ArraySizeT])
-{
- table[TTileMode][R32G32B32A32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_FLOAT>::Store;
- table[TTileMode][R32G32B32A32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_SINT>::Store;
- table[TTileMode][R32G32B32A32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_UINT>::Store;
- table[TTileMode][R32G32B32X32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32X32_FLOAT>::Store;
- table[TTileMode][R32G32B32A32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_SSCALED>::Store;
- table[TTileMode][R32G32B32A32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 128>, R32G32B32A32_FLOAT, R32G32B32A32_USCALED>::Store;
- table[TTileMode][R32G32B32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_FLOAT>::Store;
- table[TTileMode][R32G32B32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_SINT>::Store;
- table[TTileMode][R32G32B32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_UINT>::Store;
- table[TTileMode][R32G32B32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_SSCALED>::Store;
- table[TTileMode][R32G32B32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 96>, R32G32B32A32_FLOAT, R32G32B32_USCALED>::Store;
- table[TTileMode][R16G16B16A16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_UNORM>::Store;
- table[TTileMode][R16G16B16A16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_SNORM>::Store;
- table[TTileMode][R16G16B16A16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_SINT>::Store;
- table[TTileMode][R16G16B16A16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_UINT>::Store;
- table[TTileMode][R16G16B16A16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_FLOAT>::Store;
- table[TTileMode][R32G32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_FLOAT>::Store;
- table[TTileMode][R32G32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_SINT>::Store;
- table[TTileMode][R32G32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_UINT>::Store;
- table[TTileMode][R32_FLOAT_X8X24_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32_FLOAT_X8X24_TYPELESS>::Store;
- table[TTileMode][X32_TYPELESS_G8X24_UINT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, X32_TYPELESS_G8X24_UINT>::Store;
- table[TTileMode][R16G16B16X16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16X16_UNORM>::Store;
- table[TTileMode][R16G16B16X16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16X16_FLOAT>::Store;
- table[TTileMode][R16G16B16A16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_SSCALED>::Store;
- table[TTileMode][R16G16B16A16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R16G16B16A16_USCALED>::Store;
- table[TTileMode][R32G32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_SSCALED>::Store;
- table[TTileMode][R32G32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32G32B32A32_FLOAT, R32G32_USCALED>::Store;
- table[TTileMode][B8G8R8A8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8A8_UNORM>::Store;
- table[TTileMode][B8G8R8A8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8A8_UNORM_SRGB>::Store;
- table[TTileMode][R10G10B10A2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_UNORM>::StoreGeneric;
- table[TTileMode][R10G10B10A2_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_UNORM_SRGB>::StoreGeneric;
- table[TTileMode][R10G10B10A2_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_UINT>::StoreGeneric;
- table[TTileMode][R8G8B8A8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_UNORM>::Store;
- table[TTileMode][R8G8B8A8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_UNORM_SRGB>::Store;
- table[TTileMode][R8G8B8A8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_SNORM>::Store;
- table[TTileMode][R8G8B8A8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_SINT>::Store;
- table[TTileMode][R8G8B8A8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_UINT>::Store;
- table[TTileMode][R16G16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_UNORM>::Store;
- table[TTileMode][R16G16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_SNORM>::Store;
- table[TTileMode][R16G16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_SINT>::Store;
- table[TTileMode][R16G16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_UINT>::Store;
- table[TTileMode][R16G16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_FLOAT>::Store;
- table[TTileMode][B10G10R10A2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_UNORM>::StoreGeneric;
- table[TTileMode][B10G10R10A2_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_UNORM_SRGB>::StoreGeneric;
- table[TTileMode][R11G11B10_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R11G11B10_FLOAT>::StoreGeneric;
- table[TTileMode][R10G10B10_FLOAT_A2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10_FLOAT_A2_UNORM>::StoreGeneric;
- table[TTileMode][R32_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_SINT>::Store;
- table[TTileMode][R32_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_UINT>::Store;
- table[TTileMode][R32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_FLOAT>::Store;
- table[TTileMode][R24_UNORM_X8_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R24_UNORM_X8_TYPELESS>::StoreGeneric;
- table[TTileMode][X24_TYPELESS_G8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, X24_TYPELESS_G8_UINT>::StoreGeneric;
- table[TTileMode][A32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, A32_FLOAT>::Store;
- table[TTileMode][B8G8R8X8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8X8_UNORM>::Store;
- table[TTileMode][B8G8R8X8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B8G8R8X8_UNORM_SRGB>::Store;
- table[TTileMode][R8G8B8X8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8X8_UNORM>::Store;
- table[TTileMode][R8G8B8X8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8X8_UNORM_SRGB>::Store;
-}
-
-template <SWR_TILE_MODE TTileMode, size_t NumTileModesT, size_t ArraySizeT>
-void InitStoreTilesTableColor_Half2(
- PFN_STORE_TILES(&table)[NumTileModesT][ArraySizeT])
-{
- table[TTileMode][R9G9B9E5_SHAREDEXP] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R9G9B9E5_SHAREDEXP>::StoreGeneric;
- table[TTileMode][B10G10R10X2_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10X2_UNORM>::StoreGeneric;
- table[TTileMode][R10G10B10X2_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10X2_USCALED>::StoreGeneric;
- table[TTileMode][R8G8B8A8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_SSCALED>::Store;
- table[TTileMode][R8G8B8A8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R8G8B8A8_USCALED>::Store;
- table[TTileMode][R16G16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_SSCALED>::Store;
- table[TTileMode][R16G16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R16G16_USCALED>::Store;
- table[TTileMode][R32_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_SSCALED>::Store;
- table[TTileMode][R32_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R32_USCALED>::Store;
- table[TTileMode][B5G6R5_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G6R5_UNORM>::Store;
- table[TTileMode][B5G6R5_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G6R5_UNORM_SRGB>::StoreGeneric;
- table[TTileMode][B5G5R5A1_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5A1_UNORM>::StoreGeneric;
- table[TTileMode][B5G5R5A1_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5A1_UNORM_SRGB>::StoreGeneric;
- table[TTileMode][B4G4R4A4_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B4G4R4A4_UNORM>::StoreGeneric;
- table[TTileMode][B4G4R4A4_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B4G4R4A4_UNORM_SRGB>::StoreGeneric;
- table[TTileMode][R8G8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_UNORM>::Store;
- table[TTileMode][R8G8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_SNORM>::Store;
- table[TTileMode][R8G8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_SINT>::Store;
- table[TTileMode][R8G8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_UINT>::Store;
- table[TTileMode][R16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_UNORM>::Store;
- table[TTileMode][R16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_SNORM>::Store;
- table[TTileMode][R16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_SINT>::Store;
- table[TTileMode][R16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_UINT>::Store;
- table[TTileMode][R16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_FLOAT>::Store;
- table[TTileMode][A16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A16_UNORM>::Store;
- table[TTileMode][A16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A16_FLOAT>::Store;
- table[TTileMode][B5G5R5X1_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5X1_UNORM>::StoreGeneric;
- table[TTileMode][B5G5R5X1_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, B5G5R5X1_UNORM_SRGB>::StoreGeneric;
- table[TTileMode][R8G8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_SSCALED>::Store;
- table[TTileMode][R8G8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R8G8_USCALED>::Store;
- table[TTileMode][R16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_SSCALED>::Store;
- table[TTileMode][R16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, R16_USCALED>::Store;
- table[TTileMode][A1B5G5R5_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A1B5G5R5_UNORM>::StoreGeneric;
- table[TTileMode][A4B4G4R4_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32G32B32A32_FLOAT, A4B4G4R4_UNORM>::StoreGeneric;
- table[TTileMode][R8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_UNORM>::Store;
- table[TTileMode][R8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_SNORM>::Store;
- table[TTileMode][R8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_SINT>::Store;
- table[TTileMode][R8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_UINT>::Store;
- table[TTileMode][A8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, A8_UNORM>::Store;
- table[TTileMode][R8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_SSCALED>::Store;
- table[TTileMode][R8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 8>, R32G32B32A32_FLOAT, R8_USCALED>::Store;
- table[TTileMode][R8G8B8_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_UNORM>::Store;
- table[TTileMode][R8G8B8_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_SNORM>::Store;
- table[TTileMode][R8G8B8_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_SSCALED>::Store;
- table[TTileMode][R8G8B8_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_USCALED>::Store;
- table[TTileMode][R16G16B16_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_FLOAT>::Store;
- table[TTileMode][R16G16B16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_UNORM>::Store;
- table[TTileMode][R16G16B16_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_SNORM>::Store;
- table[TTileMode][R16G16B16_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_SSCALED>::Store;
- table[TTileMode][R16G16B16_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_USCALED>::Store;
- table[TTileMode][R8G8B8_UNORM_SRGB] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_UNORM_SRGB>::Store;
- table[TTileMode][R16G16B16_UINT] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_UINT>::Store;
- table[TTileMode][R16G16B16_SINT] = StoreMacroTile<TilingTraits<TTileMode, 48>, R32G32B32A32_FLOAT, R16G16B16_SINT>::Store;
- table[TTileMode][R10G10B10A2_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_SNORM>::StoreGeneric;
- table[TTileMode][R10G10B10A2_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_USCALED>::StoreGeneric;
- table[TTileMode][R10G10B10A2_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_SSCALED>::StoreGeneric;
- table[TTileMode][R10G10B10A2_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, R10G10B10A2_SINT>::StoreGeneric;
- table[TTileMode][B10G10R10A2_SNORM] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_SNORM>::StoreGeneric;
- table[TTileMode][B10G10R10A2_USCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_USCALED>::StoreGeneric;
- table[TTileMode][B10G10R10A2_SSCALED] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_SSCALED>::StoreGeneric;
- table[TTileMode][B10G10R10A2_UINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_UINT>::StoreGeneric;
- table[TTileMode][B10G10R10A2_SINT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32G32B32A32_FLOAT, B10G10R10A2_SINT>::StoreGeneric;
- table[TTileMode][R8G8B8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_UINT>::Store;
- table[TTileMode][R8G8B8_SINT] = StoreMacroTile<TilingTraits<TTileMode, 24>, R32G32B32A32_FLOAT, R8G8B8_SINT>::Store;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// INIT_STORE_TILES_TABLE - Helper macro for setting up the tables.
-template <SWR_TILE_MODE TTileMode, size_t NumTileModes, size_t ArraySizeT>
-void InitStoreTilesTableDepth(
- PFN_STORE_TILES(&table)[NumTileModes][ArraySizeT])
-{
- table[TTileMode][R32_FLOAT] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R32_FLOAT>::Store;
- table[TTileMode][R32_FLOAT_X8X24_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 64>, R32_FLOAT, R32_FLOAT_X8X24_TYPELESS>::Store;
- table[TTileMode][R24_UNORM_X8_TYPELESS] = StoreMacroTile<TilingTraits<TTileMode, 32>, R32_FLOAT, R24_UNORM_X8_TYPELESS>::Store;
- table[TTileMode][R16_UNORM] = StoreMacroTile<TilingTraits<TTileMode, 16>, R32_FLOAT, R16_UNORM>::Store;
-}
-
-template <SWR_TILE_MODE TTileMode, size_t NumTileModes, size_t ArraySizeT>
-void InitStoreTilesTableStencil(
- PFN_STORE_TILES(&table)[NumTileModes][ArraySizeT])
-{
- table[TTileMode][R8_UINT] = StoreMacroTile<TilingTraits<TTileMode, 8>, R8_UINT, R8_UINT>::Store;
-}
-
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Deswizzles and stores a full hottile to a render surface
-/// @param hPrivateContext - Handle to private DC
-/// @param srcFormat - Format for hot tile.
-/// @param renderTargetIndex - Index to destination render target
-/// @param x, y - Coordinates to raster tile.
-/// @param pSrcHotTile - Pointer to Hot Tile
-void SwrStoreHotTileToSurface(
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE *pDstSurface,
- BucketManager* pBucketMgr,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
- uint8_t *pSrcHotTile);
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear.cpp
deleted file mode 100644
index c72063f6f1d..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_Linear.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_Linear_1()
-{
- InitStoreTilesTableColor_Half1<SWR_TILE_NONE>(sStoreTilesTableColor);
- InitStoreTilesTableDepth<SWR_TILE_NONE>(sStoreTilesTableDepth);
- InitStoreTilesTableStencil<SWR_TILE_NONE>(sStoreTilesTableStencil);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear2.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear2.cpp
deleted file mode 100644
index 035e685e261..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_Linear2.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_Linear.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_Linear_2()
-{
- InitStoreTilesTableColor_Half2<SWR_TILE_NONE>(sStoreTilesTableColor);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileW.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileW.cpp
deleted file mode 100644
index ee4d99d1da0..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileW.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_TileW.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_TileW()
-{
- InitStoreTilesTableStencil<SWR_TILE_MODE_WMAJOR>(sStoreTilesTableStencil);
- // special color hot tile -> 8-bit WMAJOR
- sStoreTilesTableColor[SWR_TILE_MODE_WMAJOR][R8_UINT] = StoreMacroTile<TilingTraits<SWR_TILE_MODE_WMAJOR, 8>, R32G32B32A32_FLOAT, R8_UINT>::Store;
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX.cpp
deleted file mode 100644
index 7f49a432e92..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_TIleX.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_TileX_1()
-{
- InitStoreTilesTableColor_Half1<SWR_TILE_MODE_XMAJOR>(sStoreTilesTableColor);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX2.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX2.cpp
deleted file mode 100644
index 7e36ebececb..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileX2.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_TIleX.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_TileX_2()
-{
- InitStoreTilesTableColor_Half2<SWR_TILE_MODE_XMAJOR>(sStoreTilesTableColor);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY.cpp
deleted file mode 100644
index dade03f2523..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_TileY.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_TileY_1()
-{
- InitStoreTilesTableColor_Half1<SWR_TILE_MODE_YMAJOR>(sStoreTilesTableColor);
- InitStoreTilesTableDepth<SWR_TILE_MODE_YMAJOR>(sStoreTilesTableDepth);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY2.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY2.cpp
deleted file mode 100644
index b3ac76759fd..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile_TileY2.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file StoreTile_TileY.cpp
-*
-* @brief Functionality for Store.
-*
-******************************************************************************/
-#include "StoreTile.h"
-
-void InitStoreTilesTable_TileY_2()
-{
- InitStoreTilesTableColor_Half2<SWR_TILE_MODE_YMAJOR>(sStoreTilesTableColor);
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h b/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h
deleted file mode 100644
index 6b1b78eee46..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/SurfaceState.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2019 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file SurfaceState.h
-*
-* @brief Common definitions for surface state
-*
-******************************************************************************/
-#pragma once
-
-#include "core/state.h"
-
-//////////////////////////////////////////////////////////////////////////
-/// SWR_SURFACE_STATE
-//////////////////////////////////////////////////////////////////////////
-struct SWR_SURFACE_STATE
-{
- gfxptr_t xpBaseAddress;
- SWR_SURFACE_TYPE type; // @llvm_enum
- SWR_FORMAT format; // @llvm_enum
- uint32_t width;
- uint32_t height;
- uint32_t depth;
- uint32_t numSamples;
- uint32_t samplePattern;
- uint32_t pitch;
- uint32_t qpitch;
- uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
- uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
- float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be
- // accessed by sampler
- uint32_t lod; // for render targets, the lod being rendered to
- uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
- SWR_TILE_MODE tileMode; // @llvm_enum
- uint32_t halign;
- uint32_t valign;
- uint32_t xOffset;
- uint32_t yOffset;
-
- uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
-
- gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
- SWR_AUX_MODE auxMode; // @llvm_enum
-
-
- bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
-}; \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h b/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h
deleted file mode 100644
index 90143718eb8..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/TilingFunctions.h
+++ /dev/null
@@ -1,697 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file TilingFunctions.h
-*
-* @brief Tiling functions.
-*
-******************************************************************************/
-#pragma once
-
-#include "core/state.h"
-#include "core/format_traits.h"
-#include "memory/tilingtraits.h"
-#include "memory/SurfaceState.h"
-
-#include <algorithm>
-
-#define MAX_NUM_LOD 15
-
-#define GFX_ALIGN(x, a) (((x) + ((a) - 1)) - (((x) + ((a) - 1)) & ((a) - 1))) // Alt implementation with bitwise not (~) has issue with uint32 align used with 64-bit value, since ~'ed value will remain 32-bit.
-
-//////////////////////////////////////////////////////////////////////////
-/// SimdTile SSE(2x2), AVX(4x2), or AVX-512(4x4?)
-//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
-struct SimdTile
-{
- // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
- float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD_WIDTH];
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void GetSwizzledColor(
- uint32_t index,
- float outputColor[4])
- {
- // SOA pattern for 2x2 is a subset of 4x2.
- // 0 1 4 5
- // 2 3 6 7
- // The offset converts pattern to linear
-#if (SIMD_TILE_X_DIM == 4)
- static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
-#elif (SIMD_TILE_X_DIM == 2)
- static const uint32_t offset[] = { 0, 1, 2, 3 };
-#endif
-
- for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
- {
- outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void SetSwizzledColor(
- uint32_t index,
- const float src[4])
- {
- // SOA pattern for 2x2 is a subset of 4x2.
- // 0 1 4 5
- // 2 3 6 7
- // The offset converts pattern to linear
-#if (SIMD_TILE_X_DIM == 4)
- static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
-#elif (SIMD_TILE_X_DIM == 2)
- static const uint32_t offset[] = { 0, 1, 2, 3 };
-#endif
-
- // Only loop over the components needed for destination.
- for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
- {
- this->color[i][offset[index]] = src[i];
- }
- }
-};
-
-template<>
-struct SimdTile <R8_UINT,R8_UINT>
-{
- // SimdTile is SOA (e.g. rrrrrrrr gggggggg bbbbbbbb aaaaaaaa )
- uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD_WIDTH];
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void GetSwizzledColor(
- uint32_t index,
- float outputColor[4])
- {
- // SOA pattern for 2x2 is a subset of 4x2.
- // 0 1 4 5
- // 2 3 6 7
- // The offset converts pattern to linear
-#if (SIMD_TILE_X_DIM == 4)
- static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
-#elif (SIMD_TILE_X_DIM == 2)
- static const uint32_t offset[] = { 0, 1, 2, 3 };
-#endif
-
- for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
- {
- uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
- outputColor[i] = *(float*)&src;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void SetSwizzledColor(
- uint32_t index,
- const float src[4])
- {
- // SOA pattern for 2x2 is a subset of 4x2.
- // 0 1 4 5
- // 2 3 6 7
- // The offset converts pattern to linear
-#if (SIMD_TILE_X_DIM == 4)
- static const uint32_t offset[] = { 0, 1, 4, 5, 2, 3, 6, 7 };
-#elif (SIMD_TILE_X_DIM == 2)
- static const uint32_t offset[] = { 0, 1, 2, 3 };
-#endif
-
- // Only loop over the components needed for destination.
- for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
- {
- this->color[i][offset[index]] = *(uint8_t*)&src[i];
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// SimdTile 8x2 for AVX-512
-//////////////////////////////////////////////////////////////////////////
-
-template<SWR_FORMAT HotTileFormat, SWR_FORMAT SrcOrDstFormat>
-struct SimdTile_16
-{
- // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
- float color[FormatTraits<HotTileFormat>::numComps][KNOB_SIMD16_WIDTH];
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void GetSwizzledColor(
- uint32_t index,
- float outputColor[4])
- {
- // SOA pattern for 8x2..
- // 0 1 4 5 8 9 C D
- // 2 3 6 7 A B E F
- // The offset converts pattern to linear
- static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
- for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
- {
- outputColor[i] = this->color[FormatTraits<SrcOrDstFormat>::swizzle(i)][offset[index]];
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void SetSwizzledColor(
- uint32_t index,
- const float src[4])
- {
- // SOA pattern for 8x2..
- // 0 1 4 5 8 9 C D
- // 2 3 6 7 A B E F
- // The offset converts pattern to linear
- static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
- for (uint32_t i = 0; i < FormatTraits<SrcOrDstFormat>::numComps; ++i)
- {
- this->color[i][offset[index]] = src[i];
- }
- }
-};
-
-template<>
-struct SimdTile_16 <R8_UINT, R8_UINT>
-{
- // SimdTile is SOA (e.g. rrrrrrrrrrrrrrrr gggggggggggggggg bbbbbbbbbbbbbbbb aaaaaaaaaaaaaaaa )
- uint8_t color[FormatTraits<R8_UINT>::numComps][KNOB_SIMD16_WIDTH];
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void GetSwizzledColor(
- uint32_t index,
- float outputColor[4])
- {
- // SOA pattern for 8x2..
- // 0 1 4 5 8 9 C D
- // 2 3 6 7 A B E F
- // The offset converts pattern to linear
- static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
- for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
- {
- uint32_t src = this->color[FormatTraits<R8_UINT>::swizzle(i)][offset[index]];
- outputColor[i] = *(float*)&src;
- }
- }
-
- //////////////////////////////////////////////////////////////////////////
- /// @brief Retrieve color from simd.
- /// @param index - linear index to color within simd.
- /// @param outputColor - output color
- INLINE void SetSwizzledColor(
- uint32_t index,
- const float src[4])
- {
- // SOA pattern for 8x2..
- // 0 1 4 5 8 9 C D
- // 2 3 6 7 A B E F
- // The offset converts pattern to linear
- static const uint32_t offset[KNOB_SIMD16_WIDTH] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
-
- for (uint32_t i = 0; i < FormatTraits<R8_UINT>::numComps; ++i)
- {
- this->color[i][offset[index]] = *(uint8_t*)&src[i];
- }
- }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes lod offset for 1D surface at specified lod.
-/// @param baseWidth - width of basemip (mip 0).
-/// @param hAlign - horizontal alignment per miip, in texels
-/// @param lod - lod index
-/// @param offset - output offset.
-INLINE void ComputeLODOffset1D(
- const SWR_FORMAT_INFO& info,
- uint32_t baseWidth,
- uint32_t hAlign,
- uint32_t lod,
- uint32_t &offset)
-{
- if (lod == 0)
- {
- offset = 0;
- }
- else
- {
- uint32_t curWidth = baseWidth;
- // @note hAlign is already in blocks for compressed formats so upconvert
- // so that we have the desired alignment post-divide.
- if (info.isBC)
- {
- hAlign *= info.bcWidth;
- }
-
- offset = GFX_ALIGN(curWidth, hAlign);
- for (uint32_t l = 1; l < lod; ++l)
- {
- curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
- offset += GFX_ALIGN(curWidth, hAlign);
- }
-
- if (info.isSubsampled || info.isBC)
- {
- offset /= info.bcWidth;
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes x lod offset for 2D surface at specified lod.
-/// @param baseWidth - width of basemip (mip 0).
-/// @param hAlign - horizontal alignment per mip, in texels
-/// @param lod - lod index
-/// @param offset - output offset.
-INLINE void ComputeLODOffsetX(
- const SWR_FORMAT_INFO& info,
- uint32_t baseWidth,
- uint32_t hAlign,
- uint32_t lod,
- uint32_t &offset)
-{
- if (lod < 2)
- {
- offset = 0;
- }
- else
- {
- uint32_t curWidth = baseWidth;
- // @note hAlign is already in blocks for compressed formats so upconvert
- // so that we have the desired alignment post-divide.
- if (info.isBC)
- {
- hAlign *= info.bcWidth;
- }
-
- curWidth = std::max<uint32_t>(curWidth >> 1, 1U);
- curWidth = GFX_ALIGN(curWidth, hAlign);
-
- if (info.isSubsampled || info.isBC)
- {
- curWidth /= info.bcWidth;
- }
-
- offset = curWidth;
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes y lod offset for 2D surface at specified lod.
-/// @param baseWidth - width of basemip (mip 0).
-/// @param vAlign - vertical alignment per mip, in rows
-/// @param lod - lod index
-/// @param offset - output offset.
-INLINE void ComputeLODOffsetY(
- const SWR_FORMAT_INFO& info,
- uint32_t baseHeight,
- uint32_t vAlign,
- uint32_t lod,
- uint32_t &offset)
-{
- if (lod == 0)
- {
- offset = 0;
- }
- else
- {
- offset = 0;
- uint32_t mipHeight = baseHeight;
-
- // @note vAlign is already in blocks for compressed formats so upconvert
- // so that we have the desired alignment post-divide.
- if (info.isBC)
- {
- vAlign *= info.bcHeight;
- }
-
- for (uint32_t l = 1; l <= lod; ++l)
- {
- uint32_t alignedMipHeight = GFX_ALIGN(mipHeight, vAlign);
- offset += ((l != 2) ? alignedMipHeight : 0);
- mipHeight = std::max<uint32_t>(mipHeight >> 1, 1U);
- }
-
- if (info.isBC)
- {
- offset /= info.bcHeight;
- }
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes 1D surface offset
-/// @param x - offset from start of array slice at given lod.
-/// @param array - array slice index
-/// @param lod - lod index
-/// @param pState - surface state
-/// @param xOffsetBytes - output offset in bytes.
-template<bool UseCachedOffsets>
-INLINE void ComputeSurfaceOffset1D(
- uint32_t x,
- uint32_t array,
- uint32_t lod,
- const SWR_SURFACE_STATE *pState,
- uint32_t &xOffsetBytes)
-{
- const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
- uint32_t lodOffset;
-
- if (UseCachedOffsets)
- {
- lodOffset = pState->lodOffsets[0][lod];
- }
- else
- {
- ComputeLODOffset1D(info, pState->width, pState->halign, lod, lodOffset);
- }
-
- xOffsetBytes = (array * pState->qpitch + lodOffset + x) * info.Bpp;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Adjusts the array slice for legacy TileY MSAA
-/// @param pState - surface state
-/// @param array - array slice index
-/// @param sampleNum - requested sample
-INLINE void AdjustCoordsForMSAA(const SWR_SURFACE_STATE *pState, uint32_t& x, uint32_t& y, uint32_t& arrayIndex, uint32_t sampleNum)
-{
- /// @todo: might want to templatize adjusting for sample slices when we support tileYS/tileYF.
- if((pState->tileMode == SWR_TILE_MODE_YMAJOR ||
- pState->tileMode == SWR_TILE_MODE_WMAJOR) &&
- pState->bInterleavedSamples)
- {
- uint32_t newX, newY, newSampleX, newSampleY;
- switch(pState->numSamples)
- {
- case 1:
- newX = x;
- newY = y;
- newSampleX = newSampleY = 0;
- break;
- case 2:
- {
- assert(pState->type == SURFACE_2D);
- static const uint32_t xMask = 0xFFFFFFFD;
- static const uint32_t sampleMaskX = 0x1;
- newX = pdep_u32(x, xMask);
- newY = y;
- newSampleX = pext_u32(sampleNum, sampleMaskX);
- newSampleY = 0;
- }
- break;
- case 4:
- {
- assert(pState->type == SURFACE_2D);
- static const uint32_t mask = 0xFFFFFFFD;
- static const uint32_t sampleMaskX = 0x1;
- static const uint32_t sampleMaskY = 0x2;
- newX = pdep_u32(x, mask);
- newY = pdep_u32(y, mask);
- newSampleX = pext_u32(sampleNum, sampleMaskX);
- newSampleY = pext_u32(sampleNum, sampleMaskY);
- }
- break;
- case 8:
- {
- assert(pState->type == SURFACE_2D);
- static const uint32_t xMask = 0xFFFFFFF9;
- static const uint32_t yMask = 0xFFFFFFFD;
- static const uint32_t sampleMaskX = 0x5;
- static const uint32_t sampleMaskY = 0x2;
- newX = pdep_u32(x, xMask);
- newY = pdep_u32(y, yMask);
- newSampleX = pext_u32(sampleNum, sampleMaskX);
- newSampleY = pext_u32(sampleNum, sampleMaskY);
- }
- break;
- case 16:
- {
- assert(pState->type == SURFACE_2D);
- static const uint32_t mask = 0xFFFFFFF9;
- static const uint32_t sampleMaskX = 0x5;
- static const uint32_t sampleMaskY = 0xA;
- newX = pdep_u32(x, mask);
- newY = pdep_u32(y, mask);
- newSampleX = pext_u32(sampleNum, sampleMaskX);
- newSampleY = pext_u32(sampleNum, sampleMaskY);
- }
- break;
- default:
- assert(0 && "Unsupported sample count");
- newX = newY = 0;
- newSampleX = newSampleY = 0;
- break;
- }
- x = newX | (newSampleX << 1);
- y = newY | (newSampleY << 1);
- }
- else if(pState->tileMode == SWR_TILE_MODE_YMAJOR ||
- pState->tileMode == SWR_TILE_NONE)
- {
- uint32_t sampleShift;
- switch(pState->numSamples)
- {
- case 1:
- assert(sampleNum == 0);
- sampleShift = 0;
- break;
- case 2:
- assert(pState->type == SURFACE_2D);
- sampleShift = 1;
- break;
- case 4:
- assert(pState->type == SURFACE_2D);
- sampleShift = 2;
- break;
- case 8:
- assert(pState->type == SURFACE_2D);
- sampleShift = 3;
- break;
- case 16:
- assert(pState->type == SURFACE_2D);
- sampleShift = 4;
- break;
- default:
- assert(0 && "Unsupported sample count");
- sampleShift = 0;
- break;
- }
- arrayIndex = (arrayIndex << sampleShift) | sampleNum;
- }
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes 2D surface offset
-/// @param x - horizontal offset from start of array slice and lod.
-/// @param y - vertical offset from start of array slice and lod.
-/// @param array - array slice index
-/// @param lod - lod index
-/// @param pState - surface state
-/// @param xOffsetBytes - output x offset in bytes.
-/// @param yOffsetRows - output y offset in bytes.
-template<bool UseCachedOffsets>
-INLINE void ComputeSurfaceOffset2D(uint32_t x, uint32_t y, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows)
-{
- const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
- uint32_t lodOffsetX, lodOffsetY;
-
- if (UseCachedOffsets)
- {
- lodOffsetX = pState->lodOffsets[0][lod];
- lodOffsetY = pState->lodOffsets[1][lod];
- }
- else
- {
- ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
- ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
- }
-
- AdjustCoordsForMSAA(pState, x, y, array, sampleNum);
- xOffsetBytes = (x + lodOffsetX + pState->xOffset) * info.Bpp;
- yOffsetRows = (array * pState->qpitch) + lodOffsetY + y + pState->yOffset;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes 3D surface offset
-/// @param x - horizontal offset from start of array slice and lod.
-/// @param y - vertical offset from start of array slice and lod.
-/// @param z - depth offset from start of array slice and lod.
-/// @param lod - lod index
-/// @param pState - surface state
-/// @param xOffsetBytes - output x offset in bytes.
-/// @param yOffsetRows - output y offset in rows.
-/// @param zOffsetSlices - output y offset in slices.
-template<bool UseCachedOffsets>
-INLINE void ComputeSurfaceOffset3D(uint32_t x, uint32_t y, uint32_t z, uint32_t lod, const SWR_SURFACE_STATE *pState, uint32_t &xOffsetBytes, uint32_t &yOffsetRows, uint32_t &zOffsetSlices)
-{
- const SWR_FORMAT_INFO &info = GetFormatInfo(pState->format);
- uint32_t lodOffsetX, lodOffsetY;
-
- if (UseCachedOffsets)
- {
- lodOffsetX = pState->lodOffsets[0][lod];
- lodOffsetY = pState->lodOffsets[1][lod];
- }
- else
- {
- ComputeLODOffsetX(info, pState->width, pState->halign, lod, lodOffsetX);
- ComputeLODOffsetY(info, pState->height, pState->valign, lod, lodOffsetY);
- }
-
- xOffsetBytes = (x + lodOffsetX) * info.Bpp;
- yOffsetRows = lodOffsetY + y;
- zOffsetSlices = z;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
-/// and returns final surface address
-/// @param xOffsetBytes - x offset from base of surface in bytes
-/// @param yOffsetRows - y offset from base of surface in rows
-/// @param pState - pointer to the surface state
-template<typename TTraits>
-INLINE uint32_t ComputeTileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
-{
- return ComputeOffset2D<TTraits>(pState->pitch, xOffsetBytes, yOffsetRows);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
-/// and returns final surface address
-/// @param xOffsetBytes - x offset from base of surface in bytes
-/// @param yOffsetRows - y offset from base of surface in rows
-/// @param pState - pointer to the surface state
-template<typename TTraits>
-INLINE uint32_t ComputeTileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
-{
- return ComputeOffset3D<TTraits>(pState->qpitch, pState->pitch, xOffsetBytes, yOffsetRows, zOffsetSlices);
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Swizzles the linear x,y offsets depending on surface tiling mode
-/// and returns final surface address
-/// @param xOffsetBytes - x offset from base of surface in bytes
-/// @param yOffsetRows - y offset from base of surface in rows
-/// @param pState - pointer to the surface state
-INLINE
-uint32_t TileSwizzle2D(uint32_t xOffsetBytes, uint32_t yOffsetRows, const SWR_SURFACE_STATE *pState)
-{
- switch (pState->tileMode)
- {
- case SWR_TILE_NONE: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, pState);
- case SWR_TILE_SWRZ: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, pState);
- case SWR_TILE_MODE_XMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_XMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
- case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, pState);
- case SWR_TILE_MODE_WMAJOR: return ComputeTileSwizzle2D<TilingTraits<SWR_TILE_MODE_WMAJOR, 8> >(xOffsetBytes, yOffsetRows, pState);
- default: SWR_INVALID("Unsupported tiling mode");
- }
- return 0;
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Swizzles the linear x,y,z offsets depending on surface tiling mode
-/// and returns final surface address
-/// @param xOffsetBytes - x offset from base of surface in bytes
-/// @param yOffsetRows - y offset from base of surface in rows
-/// @param zOffsetSlices - z offset from base of surface in slices
-/// @param pState - pointer to the surface state
-INLINE
-uint32_t TileSwizzle3D(uint32_t xOffsetBytes, uint32_t yOffsetRows, uint32_t zOffsetSlices, const SWR_SURFACE_STATE *pState)
-{
- switch (pState->tileMode)
- {
- case SWR_TILE_NONE: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_NONE, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
- case SWR_TILE_SWRZ: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_SWRZ, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
- case SWR_TILE_MODE_YMAJOR: return ComputeTileSwizzle3D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(xOffsetBytes, yOffsetRows, zOffsetSlices, pState);
- default: SWR_INVALID("Unsupported tiling mode");
- }
- return 0;
-}
-
-template<bool UseCachedOffsets>
-INLINE
-uint32_t ComputeSurfaceOffset(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
-{
- uint32_t offsetX = 0, offsetY = 0, offsetZ = 0;
- switch (pState->type)
- {
- case SURFACE_BUFFER:
- case SURFACE_STRUCTURED_BUFFER:
- offsetX = x * pState->pitch;
- return offsetX;
- break;
- case SURFACE_1D:
- ComputeSurfaceOffset1D<UseCachedOffsets>(x, array, lod, pState, offsetX);
- return TileSwizzle2D(offsetX, 0, pState);
- break;
- case SURFACE_2D:
- ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
- return TileSwizzle2D(offsetX, offsetY, pState);
- case SURFACE_3D:
- ComputeSurfaceOffset3D<UseCachedOffsets>(x, y, z, lod, pState, offsetX, offsetY, offsetZ);
- return TileSwizzle3D(offsetX, offsetY, offsetZ, pState);
- break;
- case SURFACE_CUBE:
- ComputeSurfaceOffset2D<UseCachedOffsets>(x, y, array, sampleNum, lod, pState, offsetX, offsetY);
- return TileSwizzle2D(offsetX, offsetY, pState);
- break;
- default: SWR_INVALID("Unsupported format");
- }
-
- return 0;
-}
-
-typedef void*(*PFN_COMPUTESURFADDR)(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, const SWR_SURFACE_STATE*);
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes surface address at the given location and lod
-/// @param x - x location in pixels
-/// @param y - y location in rows
-/// @param z - z location for 3D surfaces
-/// @param array - array slice for 1D and 2D surfaces
-/// @param lod - level of detail
-/// @param pState - pointer to the surface state
-template<bool UseCachedOffsets, bool IsRead>
-INLINE
-void* ComputeSurfaceAddress(uint32_t x, uint32_t y, uint32_t z, uint32_t array, uint32_t sampleNum, uint32_t lod, const SWR_SURFACE_STATE *pState)
-{
- return (void*)(pState->xpBaseAddress + ComputeSurfaceOffset<UseCachedOffsets>(x, y, z, array, sampleNum, lod, pState));
-}
diff --git a/src/gallium/drivers/swr/rasterizer/memory/tilingtraits.h b/src/gallium/drivers/swr/rasterizer/memory/tilingtraits.h
deleted file mode 100644
index c2a87d85dd1..00000000000
--- a/src/gallium/drivers/swr/rasterizer/memory/tilingtraits.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tilingtraits.h
-*
-* @brief Tiling traits.
-*
-******************************************************************************/
-#pragma once
-
-#include "core/state.h"
-#include "common/intrin.h"
-
-template<SWR_TILE_MODE mode, int>
-struct TilingTraits
-{
- static const SWR_TILE_MODE TileMode{ mode };
- static UINT GetCu() { SWR_NOT_IMPL; return 0; }
- static UINT GetCv() { SWR_NOT_IMPL; return 0; }
- static UINT GetCr() { SWR_NOT_IMPL; return 0; }
- static UINT GetTileIDShift() { SWR_NOT_IMPL; return 0; }
-
- /// @todo correct pdep shifts for all rastertile dims. Unused for now
- static UINT GetPdepX() { SWR_NOT_IMPL; return 0x37; }
- static UINT GetPdepY() { SWR_NOT_IMPL; return 0xC8; }
-};
-
-template<int X> struct TilingTraits <SWR_TILE_NONE, X>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_NONE };
- static UINT GetCu() { return 0; }
- static UINT GetCv() { return 0; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return 0; }
- static UINT GetPdepX() { return 0x00; }
- static UINT GetPdepY() { return 0x00; }
-};
-
-template<> struct TilingTraits <SWR_TILE_SWRZ, 8>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_SWRZ };
- static UINT GetCu() { return KNOB_TILE_X_DIM_SHIFT; }
- static UINT GetCv() { return KNOB_TILE_Y_DIM_SHIFT; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return KNOB_TILE_X_DIM_SHIFT + KNOB_TILE_Y_DIM_SHIFT; }
-
- /// @todo correct pdep shifts for all rastertile dims. Unused for now
- static UINT GetPdepX() { SWR_NOT_IMPL; return 0x00; }
- static UINT GetPdepY() { SWR_NOT_IMPL; return 0x00; }
-};
-
-template<> struct TilingTraits <SWR_TILE_SWRZ, 32>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_SWRZ };
- static UINT GetCu() { return KNOB_TILE_X_DIM_SHIFT + 2; }
- static UINT GetCv() { return KNOB_TILE_Y_DIM_SHIFT; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return KNOB_TILE_X_DIM_SHIFT + KNOB_TILE_Y_DIM_SHIFT + 2; }
-
- static UINT GetPdepX() { return 0x37; }
- static UINT GetPdepY() { return 0xC8; }
-};
-
-template<> struct TilingTraits <SWR_TILE_SWRZ, 128>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_SWRZ };
- static UINT GetCu() { return KNOB_TILE_X_DIM_SHIFT + 4; }
- static UINT GetCv() { return KNOB_TILE_Y_DIM_SHIFT; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return KNOB_TILE_X_DIM_SHIFT + KNOB_TILE_Y_DIM_SHIFT + 4; }
-
- /// @todo correct pdep shifts for all rastertile dims. Unused for now
- static UINT GetPdepX() { SWR_NOT_IMPL; return 0x37; }
- static UINT GetPdepY() { SWR_NOT_IMPL; return 0xC8; }
-};
-
-// y-major tiling layout unaffected by element size
-template<int X> struct TilingTraits <SWR_TILE_MODE_YMAJOR, X>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_MODE_YMAJOR };
- static UINT GetCu() { return 7; }
- static UINT GetCv() { return 5; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return 12; }
-
- static UINT GetPdepX() { return 0xe0f; }
- static UINT GetPdepY() { return 0x1f0; }
-};
-
-// x-major tiling layout unaffected by element size
-template<int X> struct TilingTraits <SWR_TILE_MODE_XMAJOR, X>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_MODE_XMAJOR };
- static UINT GetCu() { return 9; }
- static UINT GetCv() { return 3; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return 12; }
-
- static UINT GetPdepX() { return 0x1ff; }
- static UINT GetPdepY() { return 0xe00; }
-};
-
-template<int X> struct TilingTraits <SWR_TILE_MODE_WMAJOR, X>
-{
- static const SWR_TILE_MODE TileMode{ SWR_TILE_MODE_WMAJOR };
- static UINT GetCu() { return 6; }
- static UINT GetCv() { return 6; }
- static UINT GetCr() { return 0; }
- static UINT GetTileIDShift() { return 12; }
-
- static UINT GetPdepX() { return 0xe15; }
- static UINT GetPdepY() { return 0x1ea; }
-};
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the tileID for 2D tiled surfaces
-/// @param pitch - surface pitch in bytes
-/// @param tileX - x offset in tiles
-/// @param tileY - y offset in tiles
-template<typename TTraits>
-INLINE UINT ComputeTileOffset2D(UINT pitch, UINT tileX, UINT tileY)
-{
- UINT tileID = tileY * (pitch >> TTraits::GetCu()) + tileX;
- return tileID << TTraits::GetTileIDShift();
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the tileID for 3D tiled surfaces
-/// @param qpitch - surface qpitch in rows
-/// @param pitch - surface pitch in bytes
-/// @param tileX - x offset in tiles
-/// @param tileY - y offset in tiles
-/// @param tileZ - y offset in tiles
-template<typename TTraits>
-INLINE UINT ComputeTileOffset3D(UINT qpitch, UINT pitch, UINT tileX, UINT tileY, UINT tileZ)
-{
- UINT tileID = (tileZ * (qpitch >> TTraits::GetCv()) + tileY) * (pitch >> TTraits::GetCu()) + tileX;
- return tileID << TTraits::GetTileIDShift();
-}
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the byte offset for 2D tiled surfaces
-/// @param pitch - surface pitch in bytes
-/// @param x - x offset in bytes
-/// @param y - y offset in rows
-template<typename TTraits>
-INLINE UINT ComputeOffset2D(UINT pitch, UINT x, UINT y)
-{
- UINT tileID = ComputeTileOffset2D<TTraits>(pitch, x >> TTraits::GetCu(), y >> TTraits::GetCv());
- UINT xSwizzle = pdep_u32(x, TTraits::GetPdepX());
- UINT ySwizzle = pdep_u32(y, TTraits::GetPdepY());
- return (tileID | xSwizzle | ySwizzle);
-}
-
-#if KNOB_ARCH <= KNOB_ARCH_AVX
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the byte offset for 2D tiled surfaces. Specialization
-/// for tile-y surfaces that uses bit twiddling instead of pdep emulation.
-/// @param pitch - surface pitch in bytes
-/// @param x - x offset in bytes
-/// @param y - y offset in rows
-template<>
-INLINE UINT ComputeOffset2D<TilingTraits<SWR_TILE_MODE_YMAJOR, 32> >(UINT pitch, UINT x, UINT y)
-{
- typedef TilingTraits<SWR_TILE_MODE_YMAJOR, 32> TTraits;
-
- UINT tileID = ComputeTileOffset2D<TTraits>(pitch, x >> TTraits::GetCu(), y >> TTraits::GetCv());
- UINT xSwizzle = ((x << 5) & 0xe00) | (x & 0xf);
- UINT ySwizzle = (y << 4) & 0x1f0;
- return (tileID | xSwizzle | ySwizzle);
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-/// @brief Computes the byte offset for 3D tiled surfaces
-/// @param qpitch - depth pitch in rows
-/// @param pitch - surface pitch in bytes
-/// @param x - x offset in bytes
-/// @param y - y offset in rows
-/// @param z - y offset in slices
-template<typename TTraits>
-INLINE UINT ComputeOffset3D(UINT qpitch, UINT pitch, UINT x, UINT y, UINT z)
-{
- UINT tileID = ComputeTileOffset3D<TTraits>(qpitch, pitch, x >> TTraits::GetCu(), y >> TTraits::GetCv(), z >> TTraits::GetCr());
- UINT xSwizzle = pdep_u32(x, TTraits::GetPdepX());
- UINT ySwizzle = pdep_u32(y, TTraits::GetPdepY());
- return (tileID | xSwizzle | ySwizzle);
-}
diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp
deleted file mode 100644
index d579cbdde9f..00000000000
--- a/src/gallium/drivers/swr/swr_clear.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "swr_context.h"
-#include "swr_query.h"
-
-static void
-swr_clear(struct pipe_context *pipe,
- unsigned buffers,
- const struct pipe_scissor_state *scissor_state,
- const union pipe_color_union *color,
- double depth,
- unsigned stencil)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct pipe_framebuffer_state *fb = &ctx->framebuffer;
-
- UINT clearMask = 0;
- unsigned layers = 0;
-
- if (!swr_check_render_cond(pipe))
- return;
-
- swr_update_derived(pipe);
-
- if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
- for (unsigned i = 0; i < fb->nr_cbufs; ++i)
- if (fb->cbufs[i] && (buffers & (PIPE_CLEAR_COLOR0 << i))) {
- clearMask |= (SWR_ATTACHMENT_COLOR0_BIT << i);
- layers = std::max(layers, fb->cbufs[i]->u.tex.last_layer -
- fb->cbufs[i]->u.tex.first_layer + 1u);
- }
- }
-
- if (buffers & PIPE_CLEAR_DEPTH && fb->zsbuf) {
- clearMask |= SWR_ATTACHMENT_DEPTH_BIT;
- layers = std::max(layers, fb->zsbuf->u.tex.last_layer -
- fb->zsbuf->u.tex.first_layer + 1u);
- }
-
- if (buffers & PIPE_CLEAR_STENCIL && fb->zsbuf) {
- clearMask |= SWR_ATTACHMENT_STENCIL_BIT;
- layers = std::max(layers, fb->zsbuf->u.tex.last_layer -
- fb->zsbuf->u.tex.first_layer + 1u);
- }
-
-#if 0 // XXX HACK, override clear color alpha. On ubuntu, clears are
- // transparent.
- ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */
-#endif
-
- /*
- * Always clear full surface. When GL_SCISSOR_TEST is enabled
- * glClear is handled by state tracker and there is no need to do this here
- */
- SWR_RECT clear_rect = {0, 0, (int32_t)fb->width, (int32_t)fb->height};
-
- for (unsigned i = 0; i < layers; ++i) {
- swr_update_draw_context(ctx);
- ctx->api.pfnSwrClearRenderTarget(ctx->swrContext, clearMask, i,
- color->f, depth, stencil,
- clear_rect);
-
- // Mask out the attachments that are out of layers.
- if (fb->zsbuf &&
- (fb->zsbuf->u.tex.last_layer <= fb->zsbuf->u.tex.first_layer + i))
- clearMask &= ~(SWR_ATTACHMENT_DEPTH_BIT | SWR_ATTACHMENT_STENCIL_BIT);
- for (unsigned c = 0; c < fb->nr_cbufs; ++c) {
- const struct pipe_surface *sf = fb->cbufs[c];
- if (sf && (sf->u.tex.last_layer <= sf->u.tex.first_layer + i))
- clearMask &= ~(SWR_ATTACHMENT_COLOR0_BIT << c);
- }
- }
-}
-
-void
-swr_clear_init(struct pipe_context *pipe)
-{
- pipe->clear = swr_clear;
-}
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
deleted file mode 100644
index 08637dba1d5..00000000000
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ /dev/null
@@ -1,595 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "swr_context.h"
-#include "swr_memory.h"
-#include "swr_screen.h"
-#include "swr_resource.h"
-#include "swr_scratch.h"
-#include "swr_query.h"
-#include "swr_fence.h"
-
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/format/u_format.h"
-#include "util/u_atomic.h"
-#include "util/u_upload_mgr.h"
-#include "util/u_transfer.h"
-#include "util/u_surface.h"
-
-#include "api.h"
-#include "backend.h"
-#include "knobs.h"
-
-static struct pipe_surface *
-swr_create_surface(struct pipe_context *pipe,
- struct pipe_resource *pt,
- const struct pipe_surface *surf_tmpl)
-{
- struct pipe_surface *ps;
-
- ps = CALLOC_STRUCT(pipe_surface);
- if (ps) {
- pipe_reference_init(&ps->reference, 1);
- pipe_resource_reference(&ps->texture, pt);
- ps->context = pipe;
- ps->format = surf_tmpl->format;
- if (pt->target != PIPE_BUFFER) {
- assert(surf_tmpl->u.tex.level <= pt->last_level);
- ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
- ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
- ps->u.tex.level = surf_tmpl->u.tex.level;
- ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
- ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- } else {
- /* setting width as number of elements should get us correct
- * renderbuffer width */
- ps->width = surf_tmpl->u.buf.last_element
- - surf_tmpl->u.buf.first_element + 1;
- ps->height = pt->height0;
- ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
- ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
- assert(ps->u.buf.first_element <= ps->u.buf.last_element);
- assert(ps->u.buf.last_element < ps->width);
- }
- }
- return ps;
-}
-
-static void
-swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)
-{
- assert(surf->texture);
- struct pipe_resource *resource = surf->texture;
-
- /* If the resource has been drawn to, store tiles. */
- swr_store_dirty_resource(pipe, resource, SWR_TILE_RESOLVED);
-
- pipe_resource_reference(&resource, NULL);
- FREE(surf);
-}
-
-
-static void *
-swr_transfer_map(struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box,
- struct pipe_transfer **transfer)
-{
- struct swr_screen *screen = swr_screen(pipe->screen);
- struct swr_resource *spr = swr_resource(resource);
- struct pipe_transfer *pt;
- enum pipe_format format = resource->format;
-
- assert(resource);
- assert(level <= resource->last_level);
-
- /* If mapping an attached rendertarget, store tiles to surface and set
- * postStoreTileState to SWR_TILE_INVALID so tiles get reloaded on next use
- * and nothing needs to be done at unmap. */
- swr_store_dirty_resource(pipe, resource, SWR_TILE_INVALID);
-
- if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
- /* If resource is in use, finish fence before mapping.
- * Unless requested not to block, then if not done return NULL map */
- if (usage & PIPE_MAP_DONTBLOCK) {
- if (swr_is_fence_pending(screen->flush_fence))
- return NULL;
- } else {
- if (spr->status) {
- /* But, if there's no fence pending, submit one.
- * XXX: Remove once draw timestamps are finished. */
- if (!swr_is_fence_pending(screen->flush_fence))
- swr_fence_submit(swr_context(pipe), screen->flush_fence);
-
- swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
- swr_resource_unused(resource);
- }
- }
- }
-
- pt = CALLOC_STRUCT(pipe_transfer);
- if (!pt)
- return NULL;
- pipe_resource_reference(&pt->resource, resource);
- pt->usage = (pipe_map_flags)usage;
- pt->level = level;
- pt->box = *box;
- pt->stride = spr->swr.pitch;
- pt->layer_stride = spr->swr.qpitch * spr->swr.pitch;
-
- /* if we're mapping the depth/stencil, copy in stencil for the section
- * being read in
- */
- if (usage & PIPE_MAP_READ && spr->has_depth && spr->has_stencil) {
- size_t zbase, sbase;
- for (int z = box->z; z < box->z + box->depth; z++) {
- zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch +
- spr->mip_offsets[level];
- sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch +
- spr->secondary_mip_offsets[level];
- for (int y = box->y; y < box->y + box->height; y++) {
- if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- for (int x = box->x; x < box->x + box->width; x++)
- ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3] =
- ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x];
- } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
- for (int x = box->x; x < box->x + box->width; x++)
- ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4] =
- ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x];
- }
- zbase += spr->swr.pitch;
- sbase += spr->secondary.pitch;
- }
- }
- }
-
- unsigned offset = box->z * pt->layer_stride +
- util_format_get_nblocksy(format, box->y) * pt->stride +
- util_format_get_stride(format, box->x);
-
- *transfer = pt;
-
- return (void*)(spr->swr.xpBaseAddress + offset + spr->mip_offsets[level]);
-}
-
-static void
-swr_transfer_flush_region(struct pipe_context *pipe,
- struct pipe_transfer *transfer,
- const struct pipe_box *flush_box)
-{
- assert(transfer->resource);
- assert(transfer->usage & PIPE_MAP_WRITE);
-
- struct swr_resource *spr = swr_resource(transfer->resource);
- if (!spr->has_depth || !spr->has_stencil)
- return;
-
- size_t zbase, sbase;
- struct pipe_box box = *flush_box;
- box.x += transfer->box.x;
- box.y += transfer->box.y;
- box.z += transfer->box.z;
- for (int z = box.z; z < box.z + box.depth; z++) {
- zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch +
- spr->mip_offsets[transfer->level];
- sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch +
- spr->secondary_mip_offsets[transfer->level];
- for (int y = box.y; y < box.y + box.height; y++) {
- if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- for (int x = box.x; x < box.x + box.width; x++)
- ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] =
- ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3];
- } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
- for (int x = box.x; x < box.x + box.width; x++)
- ((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] =
- ((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4];
- }
- zbase += spr->swr.pitch;
- sbase += spr->secondary.pitch;
- }
- }
-}
-
-static void
-swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
-{
- assert(transfer->resource);
-
- struct swr_resource *spr = swr_resource(transfer->resource);
- /* if we're mapping the depth/stencil, copy in stencil for the section
- * being written out
- */
- if (transfer->usage & PIPE_MAP_WRITE &&
- !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT) &&
- spr->has_depth && spr->has_stencil) {
- struct pipe_box box;
- u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height,
- transfer->box.depth, &box);
- swr_transfer_flush_region(pipe, transfer, &box);
- }
-
- pipe_resource_reference(&transfer->resource, NULL);
- FREE(transfer);
-}
-
-
-static void
-swr_resource_copy(struct pipe_context *pipe,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx,
- unsigned dsty,
- unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box)
-{
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* If either the src or dst is a renderTarget, store tiles before copy */
- swr_store_dirty_resource(pipe, src, SWR_TILE_RESOLVED);
- swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED);
-
- swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
- swr_resource_unused(src);
- swr_resource_unused(dst);
-
- if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
- || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
- util_resource_copy_region(
- pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);
- return;
- }
-
- debug_printf("unhandled swr_resource_copy\n");
-}
-
-
-static void
-swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
-{
- struct swr_context *ctx = swr_context(pipe);
- /* Make a copy of the const blit_info, so we can modify it */
- struct pipe_blit_info info = *blit_info;
-
- if (info.render_condition_enable && !swr_check_render_cond(pipe))
- return;
-
- if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1
- && !util_format_is_depth_or_stencil(info.src.resource->format)
- && !util_format_is_pure_integer(info.src.resource->format)) {
- debug_printf("swr_blit: color resolve : %d -> %d\n",
- info.src.resource->nr_samples, info.dst.resource->nr_samples);
-
- /* Resolve is done as part of the surface store. */
- swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
-
- struct pipe_resource *src_resource = info.src.resource;
- struct pipe_resource *resolve_target =
- swr_resource(src_resource)->resolve_target;
-
- /* The resolve target becomes the new source for the blit. */
- info.src.resource = resolve_target;
- }
-
- if (util_try_blit_via_copy_region(pipe, &info, ctx->render_cond_query != NULL)) {
- return; /* done */
- }
-
- if (info.mask & PIPE_MASK_S) {
- debug_printf("swr: cannot blit stencil, skipping\n");
- info.mask &= ~PIPE_MASK_S;
- }
-
- if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
- debug_printf("swr: blit unsupported %s -> %s\n",
- util_format_short_name(info.src.resource->format),
- util_format_short_name(info.dst.resource->format));
- return;
- }
-
- if (ctx->active_queries) {
- ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, FALSE);
- ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, FALSE);
- }
-
- util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
- util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
- util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
- util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);
- util_blitter_save_tessctrl_shader(ctx->blitter, (void*)ctx->tcs);
- util_blitter_save_tesseval_shader(ctx->blitter, (void*)ctx->tes);
- util_blitter_save_so_targets(
- ctx->blitter,
- ctx->num_so_targets,
- (struct pipe_stream_output_target **)ctx->so_targets);
- util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer);
- util_blitter_save_viewport(ctx->blitter, &ctx->viewports[0]);
- util_blitter_save_scissor(ctx->blitter, &ctx->scissors[0]);
- util_blitter_save_fragment_shader(ctx->blitter, ctx->fs);
- util_blitter_save_blend(ctx->blitter, (void *)ctx->blend);
- util_blitter_save_depth_stencil_alpha(ctx->blitter,
- (void *)ctx->depth_stencil);
- util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
- util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask, 0);
- util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
- util_blitter_save_fragment_sampler_states(
- ctx->blitter,
- ctx->num_samplers[PIPE_SHADER_FRAGMENT],
- (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);
- util_blitter_save_fragment_sampler_views(
- ctx->blitter,
- ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],
- ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
- util_blitter_save_render_condition(ctx->blitter,
- ctx->render_cond_query,
- ctx->render_cond_cond,
- ctx->render_cond_mode);
-
- util_blitter_blit(ctx->blitter, &info);
-
- if (ctx->active_queries) {
- ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, TRUE);
- ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, TRUE);
- }
-}
-
-
-static void
-swr_destroy(struct pipe_context *pipe)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- if (ctx->blitter)
- util_blitter_destroy(ctx->blitter);
-
- for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- if (ctx->framebuffer.cbufs[i]) {
- struct swr_resource *res = swr_resource(ctx->framebuffer.cbufs[i]->texture);
- /* NULL curr_pipe, so we don't have a reference to a deleted pipe */
- res->curr_pipe = NULL;
- pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
- }
- }
-
- if (ctx->framebuffer.zsbuf) {
- struct swr_resource *res = swr_resource(ctx->framebuffer.zsbuf->texture);
- /* NULL curr_pipe, so we don't have a reference to a deleted pipe */
- res->curr_pipe = NULL;
- pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL);
- }
-
- for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {
- pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
- }
-
- for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {
- pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
- }
-
- if (ctx->pipe.stream_uploader)
- u_upload_destroy(ctx->pipe.stream_uploader);
-
- /* Idle core after destroying buffer resources, but before deleting
- * context. Destroying resources has potentially called StoreTiles.*/
- ctx->api.pfnSwrWaitForIdle(ctx->swrContext);
-
- if (ctx->swrContext)
- ctx->api.pfnSwrDestroyContext(ctx->swrContext);
-
- delete ctx->blendJIT;
-
- swr_destroy_scratch_buffers(ctx);
-
-
- /* Only update screen->pipe if current context is being destroyed */
- assert(screen);
- if (screen->pipe == pipe)
- screen->pipe = NULL;
-
- AlignedFree(ctx);
-}
-
-
-static void
-swr_render_condition(struct pipe_context *pipe,
- struct pipe_query *query,
- bool condition,
- enum pipe_render_cond_flag mode)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- ctx->render_cond_query = query;
- ctx->render_cond_mode = mode;
- ctx->render_cond_cond = condition;
-}
-
-
-static void
-swr_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
-{
- // NOOP
-}
-
-static void
-swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)
-{
- swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
-
- if (!pDC)
- return;
-
- struct swr_query_result *pqr = pDC->pStats;
-
- SWR_STATS *pSwrStats = &pqr->core;
-
- pSwrStats->DepthPassCount += pStats->DepthPassCount;
- pSwrStats->PsInvocations += pStats->PsInvocations;
- pSwrStats->CsInvocations += pStats->CsInvocations;
-}
-
-static void
-swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)
-{
- swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
-
- if (!pDC)
- return;
-
- struct swr_query_result *pqr = pDC->pStats;
-
- SWR_STATS_FE *pSwrStats = &pqr->coreFE;
- p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);
- p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);
- p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);
- p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);
- p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);
- p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);
- p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);
- p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);
- p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);
-
- for (unsigned i = 0; i < 4; i++) {
- p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],
- pStats->SoPrimStorageNeeded[i]);
- p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],
- pStats->SoNumPrimsWritten[i]);
- }
-}
-
-static void
-swr_UpdateStreamOut(HANDLE hPrivateContext, uint64_t numPrims)
-{
- swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
-
- if (!pDC)
- return;
-
- if (pDC->soPrims)
- *pDC->soPrims += numPrims;
-}
-
-struct pipe_context *
-swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
-{
- struct swr_context *ctx = (struct swr_context *)
- AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);
- memset((void*)ctx, 0, sizeof(struct swr_context));
-
- swr_screen(p_screen)->pfnSwrGetInterface(ctx->api);
- swr_screen(p_screen)->pfnSwrGetTileInterface(ctx->tileApi);
- ctx->swrDC.pAPI = &ctx->api;
- ctx->swrDC.pTileAPI = &ctx->tileApi;
-
- ctx->blendJIT =
- new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
-
- ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;
-
- SWR_CREATECONTEXT_INFO createInfo {0};
-
- createInfo.privateStateSize = sizeof(swr_draw_context);
- createInfo.pfnLoadTile = swr_LoadHotTile;
- createInfo.pfnStoreTile = swr_StoreHotTile;
- createInfo.pfnUpdateStats = swr_UpdateStats;
- createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;
- createInfo.pfnUpdateStreamOut = swr_UpdateStreamOut;
- createInfo.pfnMakeGfxPtr = swr_MakeGfxPtr;
-
- SWR_THREADING_INFO threadingInfo {0};
-
- threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
- threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
- threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
- threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
- threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
-
- // Use non-standard settings for KNL
- if (swr_screen(p_screen)->is_knl)
- {
- if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE"))
- threadingInfo.MAX_THREADS_PER_CORE = 2;
-
- if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT"))
- {
- ctx->max_draws_in_flight = 2048;
- createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight;
- }
- }
-
- createInfo.pThreadInfo = &threadingInfo;
-
- ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo);
-
- ctx->api.pfnSwrInit();
-
- if (ctx->swrContext == NULL)
- goto fail;
-
- ctx->pipe.screen = p_screen;
- ctx->pipe.destroy = swr_destroy;
- ctx->pipe.priv = priv;
- ctx->pipe.create_surface = swr_create_surface;
- ctx->pipe.surface_destroy = swr_surface_destroy;
- ctx->pipe.buffer_map = swr_transfer_map;
- ctx->pipe.buffer_unmap = swr_transfer_unmap;
- ctx->pipe.texture_map = swr_transfer_map;
- ctx->pipe.texture_unmap = swr_transfer_unmap;
- ctx->pipe.transfer_flush_region = swr_transfer_flush_region;
-
- ctx->pipe.buffer_subdata = u_default_buffer_subdata;
- ctx->pipe.texture_subdata = u_default_texture_subdata;
-
- ctx->pipe.clear_texture = util_clear_texture;
- ctx->pipe.resource_copy_region = swr_resource_copy;
- ctx->pipe.flush_resource = swr_flush_resource;
- ctx->pipe.render_condition = swr_render_condition;
-
- swr_state_init(&ctx->pipe);
- swr_clear_init(&ctx->pipe);
- swr_draw_init(&ctx->pipe);
- swr_query_init(&ctx->pipe);
-
- ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe);
- if (!ctx->pipe.stream_uploader)
- goto fail;
- ctx->pipe.const_uploader = ctx->pipe.stream_uploader;
-
- ctx->pipe.blit = swr_blit;
- ctx->blitter = util_blitter_create(&ctx->pipe);
- if (!ctx->blitter)
- goto fail;
-
- swr_init_scratch_buffers(ctx);
-
- return &ctx->pipe;
-
-fail:
- /* Should really validate the init steps and fail gracefully */
- swr_destroy(&ctx->pipe);
- return NULL;
-}
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
deleted file mode 100644
index 11578764c23..00000000000
--- a/src/gallium/drivers/swr/swr_context.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_CONTEXT_H
-#define SWR_CONTEXT_H
-
-#include "common/os.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_state.h"
-#include "util/u_blitter.h"
-#include "rasterizer/memory/SurfaceState.h"
-#include "rasterizer/memory/InitMemory.h"
-#include "jit_api.h"
-#include "swr_state.h"
-#include <unordered_map>
-
-#define SWR_NEW_BLEND (1 << 0)
-#define SWR_NEW_RASTERIZER (1 << 1)
-#define SWR_NEW_DEPTH_STENCIL_ALPHA (1 << 2)
-#define SWR_NEW_SAMPLER (1 << 3)
-#define SWR_NEW_SAMPLER_VIEW (1 << 4)
-#define SWR_NEW_VS (1 << 5)
-#define SWR_NEW_FS (1 << 6)
-#define SWR_NEW_GS (1 << 7)
-#define SWR_NEW_VSCONSTANTS (1 << 8)
-#define SWR_NEW_FSCONSTANTS (1 << 9)
-#define SWR_NEW_GSCONSTANTS (1 << 10)
-#define SWR_NEW_VERTEX (1 << 11)
-#define SWR_NEW_STIPPLE (1 << 12)
-#define SWR_NEW_SCISSOR (1 << 13)
-#define SWR_NEW_VIEWPORT (1 << 14)
-#define SWR_NEW_FRAMEBUFFER (1 << 15)
-#define SWR_NEW_CLIP (1 << 16)
-#define SWR_NEW_SO (1 << 17)
-#define SWR_BLOCK_CLIENT_DRAW ( 1 << 18) // Indicates client draw will block
-#define SWR_NEW_TCS (1 << 19)
-#define SWR_NEW_TES (1 << 20)
-#define SWR_NEW_TS (1 << 21)
-#define SWR_NEW_TCSCONSTANTS (1 << 22)
-#define SWR_NEW_TESCONSTANTS (1 << 23)
-
-namespace std
-{
-template <> struct hash<BLEND_COMPILE_STATE> {
- std::size_t operator()(const BLEND_COMPILE_STATE &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-};
-
-struct swr_jit_texture {
- uint32_t width; // same as number of elements
- uint32_t height;
- uint32_t depth; // doubles as array size
- uint32_t first_level;
- uint32_t last_level;
- const uint8_t *base_ptr;
- uint32_t num_samples;
- uint32_t sample_stride;
- uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
- uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
- uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
-};
-
-struct swr_jit_sampler {
- float min_lod;
- float max_lod;
- float lod_bias;
- float border_color[4];
-};
-
-struct swr_draw_context {
- const float *constantVS[PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS];
- const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS];
- const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS];
- const float *constantTCS[PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t num_constantsTCS[PIPE_MAX_CONSTANT_BUFFERS];
- const float *constantTES[PIPE_MAX_CONSTANT_BUFFERS];
- uint32_t num_constantsTES[PIPE_MAX_CONSTANT_BUFFERS];
-
- swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS];
- swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
- swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS];
- swr_jit_texture texturesTCS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- swr_jit_sampler samplersTCS[PIPE_MAX_SAMPLERS];
- swr_jit_texture texturesTES[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- swr_jit_sampler samplersTES[PIPE_MAX_SAMPLERS];
-
- float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
-
- uint32_t polyStipple[32];
-
- SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
- struct swr_query_result *pStats; // @llvm_struct
- SWR_INTERFACE *pAPI; // @llvm_struct - Needed for the swr_memory callbacks
- SWR_TILE_INTERFACE *pTileAPI; // @llvm_struct - Needed for the swr_memory callbacks
-
- uint64_t* soPrims; //number of primitives written to StreamOut buffer
-};
-
-/* gen_llvm_types FINI */
-
-struct swr_context {
- struct pipe_context pipe; /**< base class */
-
- HANDLE swrContext;
-
- SWR_TS_STATE tsState;
-
- /** Constant state objects */
- struct swr_blend_state *blend;
- struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- struct pipe_depth_stencil_alpha_state *depth_stencil;
- struct pipe_rasterizer_state *rasterizer;
-
- struct swr_vertex_shader *vs;
- struct swr_fragment_shader *fs;
- struct swr_geometry_shader *gs;
- struct swr_tess_control_shader *tcs;
- struct swr_tess_evaluation_shader *tes;
- struct swr_vertex_element_state *velems;
-
- /** Other rendering state */
- struct pipe_blend_color blend_color;
- struct pipe_stencil_ref stencil_ref;
- struct pipe_clip_state clip;
- struct pipe_constant_buffer
- constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
- struct pipe_framebuffer_state framebuffer;
- struct swr_poly_stipple poly_stipple;
- struct pipe_scissor_state scissors[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_RECT swr_scissors[KNOB_NUM_VIEWPORTS_SCISSORS];
- struct pipe_sampler_view *
- sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
-
- struct pipe_viewport_state viewports[KNOB_NUM_VIEWPORTS_SCISSORS];
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-
- struct blitter_context *blitter;
-
- /** Conditional query object and mode */
- struct pipe_query *render_cond_query;
- enum pipe_render_cond_flag render_cond_mode;
- bool render_cond_cond;
- unsigned active_queries;
-
- unsigned num_vertex_buffers;
- unsigned num_samplers[PIPE_SHADER_TYPES];
- unsigned num_sampler_views[PIPE_SHADER_TYPES];
-
- unsigned sample_mask;
-
- // streamout
- pipe_stream_output_target *so_targets[MAX_SO_STREAMS];
- uint32_t num_so_targets;
- uint64_t so_primCounter; // number of primitives written to StreamOut buffer
-
- /* Temp storage for user_buffer constants */
- struct swr_scratch_buffers *scratch;
-
- // blend jit functions
- std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC> *blendJIT;
-
- /* Derived SWR API DrawState */
- struct swr_derived_state derived;
-
- /* SWR private state - draw context */
- struct swr_draw_context swrDC;
-
- unsigned dirty; /**< Mask of SWR_NEW_x flags */
-
- SWR_INTERFACE api;
- SWR_TILE_INTERFACE tileApi;
-
- uint32_t max_draws_in_flight;
- uint8_t patch_vertices;
-};
-
-static INLINE struct swr_context *
-swr_context(struct pipe_context *pipe)
-{
- return (struct swr_context *)pipe;
-}
-
-static INLINE void
-swr_update_draw_context(struct swr_context *ctx,
- struct swr_query_result *pqr = nullptr)
-{
- swr_draw_context *pDC =
- (swr_draw_context *)ctx->api.pfnSwrGetPrivateContextState(ctx->swrContext);
- if (pqr)
- ctx->swrDC.pStats = pqr;
- memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
-}
-
-struct pipe_context *swr_create_context(struct pipe_screen *, void *priv, unsigned flags);
-
-void swr_state_init(struct pipe_context *pipe);
-
-void swr_clear_init(struct pipe_context *pipe);
-
-void swr_draw_init(struct pipe_context *pipe);
-
-void swr_finish(struct pipe_context *pipe);
-
-void swr_do_msaa_resolve(struct pipe_resource *src_resource,
- struct pipe_resource *dst_resource);
-#endif
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
deleted file mode 100644
index 4b42a8e0390..00000000000
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ /dev/null
@@ -1,399 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "swr_screen.h"
-#include "swr_context.h"
-#include "swr_resource.h"
-#include "swr_fence.h"
-#include "swr_query.h"
-#include "jit_api.h"
-
-#include "util/u_draw.h"
-#include "util/u_prim.h"
-
-#include <algorithm>
-#include <iostream>
-/*
- * Draw vertex arrays, with optional indexing, optional instancing.
- */
-static void
-swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
- unsigned drawid_offset,
- const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count_bias *draws,
- unsigned num_draws)
-{
- if (num_draws > 1) {
- struct pipe_draw_info tmp_info = *info;
- unsigned drawid = drawid_offset;
-
- for (unsigned i = 0; i < num_draws; i++) {
- swr_draw_vbo(pipe, &tmp_info, drawid, indirect, &draws[i], 1);
- if (tmp_info.increment_draw_id)
- drawid++;
- }
- return;
- }
-
- if (!indirect && (!draws[0].count || !info->instance_count))
- return;
-
- struct swr_context *ctx = swr_context(pipe);
-
- if (!indirect &&
- !info->primitive_restart &&
- !u_trim_pipe_prim(info->mode, (unsigned*)&draws[0].count))
- return;
-
- if (!swr_check_render_cond(pipe))
- return;
-
- if (indirect && indirect->buffer) {
- util_draw_indirect(pipe, info, indirect);
- return;
- }
-
- /* If indexed draw, force vertex validation since index buffer comes
- * from draw info. */
- if (info->index_size)
- ctx->dirty |= SWR_NEW_VERTEX;
-
- /* Update derived state, pass draw info to update function. */
- swr_update_derived(pipe, info, draws);
-
- swr_update_draw_context(ctx);
-
- struct pipe_draw_info resolved_info;
- struct pipe_draw_start_count_bias resolved_draw;
- /* DrawTransformFeedback */
- if (indirect && indirect->count_from_stream_output) {
- // trick copied from softpipe to modify const struct *info
- memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
- resolved_draw.start = draws[0].start;
- resolved_draw.count = ctx->so_primCounter * ctx->patch_vertices;
- resolved_info.max_index = resolved_draw.count - 1;
- info = &resolved_info;
- indirect = NULL;
- draws = &resolved_draw;
- }
-
- if (ctx->vs->pipe.stream_output.num_outputs) {
- if (!ctx->vs->soFunc[info->mode]) {
- STREAMOUT_COMPILE_STATE state = {0};
- struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
-
- state.numVertsPerPrim = u_vertices_per_prim(info->mode);
-
- uint32_t offsets[MAX_SO_STREAMS] = {0};
- uint32_t num = 0;
-
- for (uint32_t i = 0; i < so->num_outputs; i++) {
- assert(so->output[i].stream == 0); // @todo
- uint32_t output_buffer = so->output[i].output_buffer;
- if (so->output[i].dst_offset != offsets[output_buffer]) {
- // hole - need to fill
- state.stream.decl[num].bufferIndex = output_buffer;
- state.stream.decl[num].hole = true;
- state.stream.decl[num].componentMask =
- (1 << (so->output[i].dst_offset - offsets[output_buffer]))
- - 1;
- num++;
- offsets[output_buffer] = so->output[i].dst_offset;
- }
-
- unsigned attrib_slot = so->output[i].register_index;
- attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
-
- state.stream.decl[num].bufferIndex = output_buffer;
- state.stream.decl[num].attribSlot = attrib_slot;
- state.stream.decl[num].componentMask =
- ((1 << so->output[i].num_components) - 1)
- << so->output[i].start_component;
- state.stream.decl[num].hole = false;
- num++;
-
- offsets[output_buffer] += so->output[i].num_components;
- }
-
- state.stream.numDecls = num;
-
- HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
- ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
- debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]);
- assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
- }
-
- ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
- }
-
- struct swr_vertex_element_state *velems = ctx->velems;
- if (info->primitive_restart)
- velems->fsState.cutIndex = info->restart_index;
- else
- velems->fsState.cutIndex = 0;
- velems->fsState.bEnableCutIndex = info->primitive_restart;
- velems->fsState.bPartialVertexBuffer = (info->index_bounds_valid && info->min_index > 0);
-
- swr_jit_fetch_key key;
- swr_generate_fetch_key(key, velems);
- auto search = velems->map.find(key);
- if (search != velems->map.end()) {
- velems->fsFunc = search->second;
- } else {
- HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
- velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
-
- debug_printf("fetch shader %p\n", velems->fsFunc);
- assert(velems->fsFunc && "Error: FetchShader = NULL");
-
- velems->map.insert(std::make_pair(key, velems->fsFunc));
- }
-
- ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
-
- /* Set up frontend state
- * XXX setup provokingVertex & topologyProvokingVertex */
- SWR_FRONTEND_STATE feState = {0};
-
- // feState.vsVertexSize seeds the PA size that is used as an interface
- // between all the shader stages, so it has to be large enough to
- // incorporate all interfaces between stages
-
- // max of frontend shaders num_outputs
- feState.vsVertexSize = ctx->vs->info.base.num_outputs;
- if (ctx->gs) {
- feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->gs->info.base.num_outputs);
- }
- if (ctx->tcs) {
- feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tcs->info.base.num_outputs);
- }
- if (ctx->tes) {
- feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tes->info.base.num_outputs);
- }
-
-
- if (ctx->vs->info.base.num_outputs) {
- // gs does not adjust for position in SGV slot at input from vs
- if (!ctx->gs && !ctx->tcs && !ctx->tes)
- feState.vsVertexSize--;
- }
-
- // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
- feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
-
- // The PA in the clipper does not handle BE vertex sizes
- // different from FE. Increase vertexsize only for the cases that needed it
-
- // primid needs a slot
- if (ctx->fs->info.base.uses_primid)
- feState.vsVertexSize++;
- // sprite coord enable
- if (ctx->rasterizer->sprite_coord_enable)
- feState.vsVertexSize++;
-
- if (ctx->rasterizer->flatshade_first) {
- feState.provokingVertex = {1, 0, 0};
- } else {
- feState.provokingVertex = {2, 1, 2};
- }
-
- enum pipe_prim_type topology;
- if (ctx->gs)
- topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
- else
- topology = info->mode;
-
- switch (topology) {
- case PIPE_PRIM_TRIANGLE_FAN:
- feState.topologyProvokingVertex = feState.provokingVertex.triFan;
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLES:
- feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- case PIPE_PRIM_QUADS:
- if (ctx->rasterizer->flatshade_first)
- feState.topologyProvokingVertex = 0;
- else
- feState.topologyProvokingVertex = 3;
- break;
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_LINE_LOOP:
- case PIPE_PRIM_LINE_STRIP:
- feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
- break;
- default:
- feState.topologyProvokingVertex = 0;
- }
-
- feState.bEnableCutIndex = info->primitive_restart;
- ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
-
- if (info->index_size)
- ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
- swr_convert_prim_topology(info->mode, ctx->patch_vertices),
- draws[0].count,
- info->instance_count,
- draws[0].start,
- draws->index_bias,
- info->start_instance);
- else
- ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
- swr_convert_prim_topology(info->mode, ctx->patch_vertices),
- draws[0].count,
- info->instance_count,
- draws[0].start,
- info->start_instance);
-
- /* On client-buffer draw, we used client buffer directly, without
- * copy. Block until draw is finished.
- * VMD is an example application that benefits from this. */
- if (ctx->dirty & SWR_BLOCK_CLIENT_DRAW) {
- struct swr_screen *screen = swr_screen(pipe->screen);
- swr_fence_submit(ctx, screen->flush_fence);
- swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
- }
-}
-
-
-static void
-swr_flush(struct pipe_context *pipe,
- struct pipe_fence_handle **fence,
- unsigned flags)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
- struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
- if (cb) {
- swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
- }
- }
- if (ctx->framebuffer.zsbuf) {
- swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
- SWR_TILE_RESOLVED);
- }
-
- if (fence)
- swr_fence_reference(pipe->screen, fence, screen->flush_fence);
-}
-
-void
-swr_finish(struct pipe_context *pipe)
-{
- struct pipe_fence_handle *fence = nullptr;
-
- swr_flush(pipe, &fence, 0);
- swr_fence_finish(pipe->screen, NULL, fence, 0);
- swr_fence_reference(pipe->screen, &fence, NULL);
-}
-
-/*
- * Invalidate tiles so they can be reloaded back when needed
- */
-void
-swr_invalidate_render_target(struct pipe_context *pipe,
- uint32_t attachment,
- uint16_t width, uint16_t height)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- /* grab the rect from the passed in arguments */
- swr_update_draw_context(ctx);
- SWR_RECT full_rect =
- {0, 0, (int32_t)width, (int32_t)height};
- ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
- 1 << attachment,
- full_rect);
-}
-
-
-/*
- * Store SWR HotTiles back to renderTarget surface.
- */
-void
-swr_store_render_target(struct pipe_context *pipe,
- uint32_t attachment,
- enum SWR_TILE_STATE post_tile_state)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_draw_context *pDC = &ctx->swrDC;
- struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
-
- /* Only proceed if there's a valid surface to store to */
- if (renderTarget->xpBaseAddress) {
- swr_update_draw_context(ctx);
- SWR_RECT full_rect =
- {0, 0,
- (int32_t)u_minify(renderTarget->width, renderTarget->lod),
- (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
- ctx->api.pfnSwrStoreTiles(ctx->swrContext,
- 1 << attachment,
- post_tile_state,
- full_rect);
- }
-}
-
-void
-swr_store_dirty_resource(struct pipe_context *pipe,
- struct pipe_resource *resource,
- enum SWR_TILE_STATE post_tile_state)
-{
- /* Only store resource if it has been written to */
- if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
- struct swr_context *ctx = swr_context(pipe);
- struct swr_screen *screen = swr_screen(pipe->screen);
- struct swr_resource *spr = swr_resource(resource);
-
- swr_draw_context *pDC = &ctx->swrDC;
- SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
- for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
- if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
- (spr->secondary.xpBaseAddress &&
- renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
- swr_store_render_target(pipe, i, post_tile_state);
-
- /* Mesa thinks depth/stencil are fused, so we'll never get an
- * explicit resource for stencil. So, if checking depth, then
- * also check for stencil. */
- if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
- swr_store_render_target(
- pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
- }
-
- /* This fence signals StoreTiles completion */
- swr_fence_submit(ctx, screen->flush_fence);
-
- break;
- }
- }
-}
-
-void
-swr_draw_init(struct pipe_context *pipe)
-{
- pipe->draw_vbo = swr_draw_vbo;
- pipe->flush = swr_flush;
-}
diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp
deleted file mode 100644
index 4e2b2af874c..00000000000
--- a/src/gallium/drivers/swr/swr_fence.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "pipe/p_screen.h"
-#include "util/u_memory.h"
-#include "util/os_time.h"
-
-#include "swr_context.h"
-#include "swr_screen.h"
-#include "swr_fence.h"
-
-#ifdef __APPLE__
-#include <sched.h>
-#endif
-
-#if defined(PIPE_CC_MSVC) // portable thread yield
- #define sched_yield SwitchToThread
-#endif
-
-/*
- * Fence callback, called by back-end thread on completion of all rendering up
- * to SwrSync call.
- */
-static void
-swr_fence_cb(uint64_t userData, uint64_t userData2, uint64_t userData3)
-{
- struct swr_fence *fence = (struct swr_fence *)userData;
-
- /* Complete all work attached to the fence */
- swr_fence_do_work(fence);
-
- /* Correct value is in SwrSync data, and not the fence write field. */
- /* Contexts may not finish in order, but fence value always increases */
- if (fence->read < userData2)
- fence->read = userData2;
-}
-
-/*
- * Submit an existing fence.
- */
-void
-swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fh)
-{
- struct swr_fence *fence = swr_fence(fh);
-
- fence->write++;
- fence->pending = TRUE;
- ctx->api.pfnSwrSync(ctx->swrContext, swr_fence_cb, (uint64_t)fence, fence->write, 0);
-}
-
-/*
- * Create a new fence object.
- */
-struct pipe_fence_handle *
-swr_fence_create()
-{
- static int fence_id = 0;
- struct swr_fence *fence = CALLOC_STRUCT(swr_fence);
- if (!fence)
- return NULL;
-
- pipe_reference_init(&fence->reference, 1);
- fence->id = fence_id++;
- fence->work.tail = &fence->work.head;
-
- return (struct pipe_fence_handle *)fence;
-}
-
-/** Destroy a fence. Called when refcount hits zero. */
-static void
-swr_fence_destroy(struct swr_fence *fence)
-{
- /* Complete any work left if fence was not submitted */
- swr_fence_do_work(fence);
- FREE(fence);
-}
-
-/**
- * Set ptr = fence, with reference counting
- */
-void
-swr_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *f)
-{
- struct swr_fence *fence = swr_fence(f);
- struct swr_fence *old;
-
- if (likely(ptr)) {
- old = swr_fence(*ptr);
- *ptr = f;
- } else {
- old = NULL;
- }
-
- if (pipe_reference(&old->reference, &fence->reference)) {
- swr_fence_finish(screen, NULL, (struct pipe_fence_handle *) old, 0);
- swr_fence_destroy(old);
- }
-}
-
-
-/*
- * Wait for the fence to finish.
- */
-bool
-swr_fence_finish(struct pipe_screen *screen,
- struct pipe_context *ctx,
- struct pipe_fence_handle *fence_handle,
- uint64_t timeout)
-{
- while (!swr_is_fence_done(fence_handle))
- sched_yield();
-
- swr_fence(fence_handle)->pending = FALSE;
-
- return TRUE;
-}
-
-
-uint64_t
-swr_get_timestamp(struct pipe_screen *screen)
-{
- return os_time_get_nano();
-}
-
-
-void
-swr_fence_init(struct pipe_screen *p_screen)
-{
- p_screen->fence_reference = swr_fence_reference;
- p_screen->fence_finish = swr_fence_finish;
- p_screen->get_timestamp = swr_get_timestamp;
-
- /* Create persistant StoreTiles "flush" fence, used to signal completion
- * of flushing tile state back to resource texture, via StoreTiles. */
- struct swr_screen *screen = swr_screen(p_screen);
- screen->flush_fence = swr_fence_create();
-}
diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h
deleted file mode 100644
index 2f7cd1cf9a6..00000000000
--- a/src/gallium/drivers/swr/swr_fence.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_FENCE_H
-#define SWR_FENCE_H
-
-#include "pipe/p_state.h"
-#include "util/u_inlines.h"
-
-#include "swr_fence_work.h"
-
-struct pipe_screen;
-
-struct swr_fence {
- struct pipe_reference reference;
-
- uint64_t read;
- uint64_t write;
-
- unsigned pending;
-
- unsigned id; /* Just for reference */
-
- struct {
- uint32_t count;
- struct swr_fence_work head;
- struct swr_fence_work *tail;
- } work;
-};
-
-
-static inline struct swr_fence *
-swr_fence(struct pipe_fence_handle *fence)
-{
- return (struct swr_fence *)fence;
-}
-
-
-static INLINE bool
-swr_is_fence_done(struct pipe_fence_handle *fence_handle)
-{
- struct swr_fence *fence = swr_fence(fence_handle);
- return (fence->read == fence->write);
-}
-
-static INLINE bool
-swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
-{
- return swr_fence(fence_handle)->pending;
-}
-
-
-void swr_fence_init(struct pipe_screen *screen);
-
-struct pipe_fence_handle *swr_fence_create();
-
-void swr_fence_reference(struct pipe_screen *screen,
- struct pipe_fence_handle **ptr,
- struct pipe_fence_handle *f);
-
-bool swr_fence_finish(struct pipe_screen *screen,
- struct pipe_context *ctx,
- struct pipe_fence_handle *fence_handle,
- uint64_t timeout);
-
-void
-swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fence);
-
-uint64_t swr_get_timestamp(struct pipe_screen *screen);
-
-#endif
diff --git a/src/gallium/drivers/swr/swr_fence_work.cpp b/src/gallium/drivers/swr/swr_fence_work.cpp
deleted file mode 100644
index 6df55666a36..00000000000
--- a/src/gallium/drivers/swr/swr_fence_work.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "swr_context.h"
-#include "swr_fence.h"
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-
-/*
- * Called by swr_fence_cb to complete the work queue
- */
-void
-swr_fence_do_work(struct swr_fence *fence)
-{
- struct swr_fence_work *work, *tmp;
-
- if (fence->work.head.next) {
- work = fence->work.head.next;
- /* Immediately clear the head so any new work gets added to a new work
- * queue */
- p_atomic_set(&fence->work.head.next, 0);
- p_atomic_set(&fence->work.tail, &fence->work.head);
- p_atomic_set(&fence->work.count, 0);
-
- do {
- tmp = work->next;
- work->callback(work);
- FREE(work);
- work = tmp;
- } while(work);
- }
-}
-
-
-/*
- * Called by one of the specialized work routines below
- */
-static inline void
-swr_add_fence_work(struct pipe_fence_handle *fh,
- struct swr_fence_work *work)
-{
- /* If no fence, just do the work now */
- if (!fh) {
- work->callback(work);
- FREE(work);
- return;
- }
-
- struct swr_fence *fence = swr_fence(fh);
- p_atomic_set(&fence->work.tail->next, work);
- p_atomic_set(&fence->work.tail, work);
- p_atomic_inc(&fence->work.count);
-}
-
-
-/*
- * Generic free/free_aligned, and delete vs/fs
- */
-template<bool aligned_free>
-static void
-swr_free_cb(struct swr_fence_work *work)
-{
- if (aligned_free)
- AlignedFree(work->free.data);
- else
- FREE(work->free.data);
-}
-
-static void
-swr_delete_vs_cb(struct swr_fence_work *work)
-{
- delete work->free.swr_vs;
-}
-
-static void
-swr_delete_fs_cb(struct swr_fence_work *work)
-{
- delete work->free.swr_fs;
-}
-
-static void
-swr_delete_gs_cb(struct swr_fence_work *work)
-{
- delete work->free.swr_gs;
-}
-
-static void
-swr_delete_tcs_cb(struct swr_fence_work *work)
-{
- delete work->free.swr_tcs;
-}
-
-static void
-swr_delete_tes_cb(struct swr_fence_work *work)
-{
- delete work->free.swr_tes;
-}
-
-
-bool
-swr_fence_work_free(struct pipe_fence_handle *fence, void *data,
- bool aligned_free)
-{
- struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
- if (!work)
- return false;
- if (aligned_free)
- work->callback = swr_free_cb<true>;
- else
- work->callback = swr_free_cb<false>;
- work->free.data = data;
-
- swr_add_fence_work(fence, work);
-
- return true;
-}
-
-bool
-swr_fence_work_delete_vs(struct pipe_fence_handle *fence,
- struct swr_vertex_shader *swr_vs)
-{
- struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
- if (!work)
- return false;
- work->callback = swr_delete_vs_cb;
- work->free.swr_vs = swr_vs;
-
- swr_add_fence_work(fence, work);
-
- return true;
-}
-
-bool
-swr_fence_work_delete_fs(struct pipe_fence_handle *fence,
- struct swr_fragment_shader *swr_fs)
-{
- struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
- if (!work)
- return false;
- work->callback = swr_delete_fs_cb;
- work->free.swr_fs = swr_fs;
-
- swr_add_fence_work(fence, work);
-
- return true;
-}
-
-bool
-swr_fence_work_delete_gs(struct pipe_fence_handle *fence,
- struct swr_geometry_shader *swr_gs)
-{
- struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
- if (!work)
- return false;
- work->callback = swr_delete_gs_cb;
- work->free.swr_gs = swr_gs;
-
- swr_add_fence_work(fence, work);
-
- return true;
-}
-
-bool
-swr_fence_work_delete_tcs(struct pipe_fence_handle *fence,
- struct swr_tess_control_shader *swr_tcs)
-{
- struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
- if (!work)
- return false;
- work->callback = swr_delete_tcs_cb;
- work->free.swr_tcs = swr_tcs;
-
- swr_add_fence_work(fence, work);
-
- return true;
-}
-
-
-bool
-swr_fence_work_delete_tes(struct pipe_fence_handle *fence,
- struct swr_tess_evaluation_shader *swr_tes)
-{
- struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
- if (!work)
- return false;
- work->callback = swr_delete_tes_cb;
- work->free.swr_tes = swr_tes;
-
- swr_add_fence_work(fence, work);
-
- return true;
-} \ No newline at end of file
diff --git a/src/gallium/drivers/swr/swr_fence_work.h b/src/gallium/drivers/swr/swr_fence_work.h
deleted file mode 100644
index ab411599ca5..00000000000
--- a/src/gallium/drivers/swr/swr_fence_work.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_FENCE_WORK_H
-#define SWR_FENCE_WORK_H
-
-typedef void(*SWR_WORK_CALLBACK_FUNC)(struct swr_fence_work *work);
-
-struct swr_fence_work {
- SWR_WORK_CALLBACK_FUNC callback;
-
- union {
- void *data;
- struct swr_vertex_shader *swr_vs;
- struct swr_fragment_shader *swr_fs;
- struct swr_geometry_shader *swr_gs;
- struct swr_tess_control_shader *swr_tcs;
- struct swr_tess_evaluation_shader *swr_tes;
- } free;
-
- struct swr_fence_work *next;
-};
-
-void swr_fence_do_work(struct swr_fence *fence);
-
-bool swr_fence_work_free(struct pipe_fence_handle *fence, void *data,
- bool aligned_free = false);
-bool swr_fence_work_delete_vs(struct pipe_fence_handle *fence,
- struct swr_vertex_shader *swr_vs);
-bool swr_fence_work_delete_fs(struct pipe_fence_handle *fence,
- struct swr_fragment_shader *swr_vs);
-bool swr_fence_work_delete_gs(struct pipe_fence_handle *fence,
- struct swr_geometry_shader *swr_gs);
-bool swr_fence_work_delete_tcs(struct pipe_fence_handle *fence,
- struct swr_tess_control_shader *swr_tcs);
-bool swr_fence_work_delete_tes(struct pipe_fence_handle *fence,
- struct swr_tess_evaluation_shader *swr_tes);
-#endif
diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp
deleted file mode 100644
index 1fb14e636d7..00000000000
--- a/src/gallium/drivers/swr/swr_loader.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "memory/InitMemory.h"
-#include "util/u_cpu_detect.h"
-#include "util/u_dl.h"
-#include "swr_public.h"
-#include "swr_screen.h"
-
-#include <stdio.h>
-
-// Helper function to resolve the backend filename based on architecture
-static bool
-swr_initialize_screen_interface(struct swr_screen *screen, const char arch[])
-{
-#ifdef HAVE_SWR_BUILTIN
- screen->pLibrary = NULL;
- screen->pfnSwrGetInterface = SwrGetInterface;
- screen->pfnSwrGetTileInterface = SwrGetTileIterface;
- InitTilesTable();
- swr_print_info("(using: builtin).\n");
-#else
- char filename[256] = { 0 };
- sprintf(filename, "%sswr%s%s", UTIL_DL_PREFIX, arch, UTIL_DL_EXT);
-
- screen->pLibrary = util_dl_open(filename);
- if (!screen->pLibrary) {
- fprintf(stderr, "(skipping: %s).\n", util_dl_error());
- return false;
- }
-
- util_dl_proc pApiProc = util_dl_get_proc_address(screen->pLibrary,
- "SwrGetInterface");
- util_dl_proc pTileApiProc = util_dl_get_proc_address(screen->pLibrary,
- "SwrGetTileIterface");
- util_dl_proc pInitFunc = util_dl_get_proc_address(screen->pLibrary,
- "InitTilesTable");
- if (!pApiProc || !pInitFunc || !pTileApiProc) {
- fprintf(stderr, "(skipping: %s).\n", util_dl_error());
- util_dl_close(screen->pLibrary);
- screen->pLibrary = NULL;
- return false;
- }
-
- screen->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
- screen->pfnSwrGetTileInterface = (PFNSwrGetTileInterface)pTileApiProc;
-
- SWR_ASSERT(screen->pfnSwrGetInterface != nullptr);
- SWR_ASSERT(screen->pfnSwrGetTileInterface != nullptr);
- SWR_ASSERT(pInitFunc != nullptr);
-
- pInitFunc();
-
- swr_print_info("(using: %s).\n", filename);
-#endif
-
- return true;
-}
-
-
-struct pipe_screen *
-swr_create_screen(struct sw_winsys *winsys)
-{
- struct pipe_screen *p_screen = swr_create_screen_internal(winsys);
- if (!p_screen) {
- return NULL;
- }
-
- struct swr_screen *screen = swr_screen(p_screen);
- screen->is_knl = false;
-
- util_cpu_detect();
-
- if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512er) {
- swr_print_info("SWR detected KNL instruction support ");
-#ifndef HAVE_SWR_KNL
- swr_print_info("(skipping: not built).\n");
-#else
- if (swr_initialize_screen_interface(screen, "KNL")) {
- screen->is_knl = true;
- return p_screen;
- }
-#endif
- }
-
- if (util_get_cpu_caps()->has_avx512f && util_get_cpu_caps()->has_avx512bw) {
- swr_print_info("SWR detected SKX instruction support ");
-#ifndef HAVE_SWR_SKX
- swr_print_info("(skipping not built).\n");
-#else
- if (swr_initialize_screen_interface(screen, "SKX"))
- return p_screen;
-#endif
- }
-
- if (util_get_cpu_caps()->has_avx2) {
- swr_print_info("SWR detected AVX2 instruction support ");
-#ifndef HAVE_SWR_AVX2
- swr_print_info("(skipping not built).\n");
-#else
- if (swr_initialize_screen_interface(screen, "AVX2"))
- return p_screen;
-#endif
- }
-
- if (util_get_cpu_caps()->has_avx) {
- swr_print_info("SWR detected AVX instruction support ");
-#ifndef HAVE_SWR_AVX
- swr_print_info("(skipping not built).\n");
-#else
- if (swr_initialize_screen_interface(screen, "AVX"))
- return p_screen;
-#endif
- }
-
- fprintf(stderr, "SWR could not initialize a supported CPU architecture.\n");
- swr_destroy_screen_internal(&screen);
-
- return NULL;
-}
-
-
-#ifdef _WIN32
-// swap function called from libl_gdi.c
-
-void
-swr_gdi_swap(struct pipe_screen *screen,
- struct pipe_context *ctx,
- struct pipe_resource *res,
- void *hDC)
-{
- screen->flush_frontbuffer(screen,
- ctx,
- res,
- 0, 0,
- hDC,
- NULL);
-}
-
-#endif /* _WIN32 */
diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h
deleted file mode 100644
index bf6eaa34758..00000000000
--- a/src/gallium/drivers/swr/swr_memory.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#pragma once
-#include "rasterizer/core/context.h"
-INLINE void
-swr_LoadHotTile(HANDLE hDC,
- HANDLE hWorkerPrivateData,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- UINT x, UINT y,
- uint32_t renderTargetArrayIndex, uint8_t* pDstHotTile)
-{
- DRAW_CONTEXT *pDC = (DRAW_CONTEXT*)hDC;
- swr_draw_context *pSDC = (swr_draw_context*)GetPrivateState(pDC);
- SWR_SURFACE_STATE *pSrcSurface = &pSDC->renderTargets[renderTargetIndex];
-
- pSDC->pTileAPI->pfnSwrLoadHotTile(hWorkerPrivateData, pSrcSurface, pDC->pContext->pBucketMgr, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
-}
-
-INLINE void
-swr_StoreHotTile(HANDLE hDC,
- HANDLE hWorkerPrivateData,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- UINT x, UINT y,
- uint32_t renderTargetArrayIndex, uint8_t* pSrcHotTile)
-{
- DRAW_CONTEXT *pDC = (DRAW_CONTEXT*)hDC;
- swr_draw_context *pSDC = (swr_draw_context*)GetPrivateState(pDC);
- SWR_SURFACE_STATE *pDstSurface = &pSDC->renderTargets[renderTargetIndex];
-
- pSDC->pTileAPI->pfnSwrStoreHotTileToSurface(hWorkerPrivateData, pDstSurface, pDC->pContext->pBucketMgr, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
-}
-
-INLINE gfxptr_t
-swr_MakeGfxPtr(HANDLE hPrivateContext, void* sysAddr)
-{
- // Fulfill an unused internal interface
- return (gfxptr_t)sysAddr;
-}
diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h
deleted file mode 100644
index 2a7d2984cb3..00000000000
--- a/src/gallium/drivers/swr/swr_public.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_PUBLIC_H
-#define SWR_PUBLIC_H
-
-struct pipe_screen;
-struct pipe_context;
-struct sw_displaytarget;
-struct sw_winsys;
-struct swr_screen;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// driver entry point
-struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
-
-// arch-specific dll entry point
-struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
-
-// cleanup for failed screen creation
-void swr_destroy_screen_internal(struct swr_screen **screen);
-
-#ifdef _WIN32
-void swr_gdi_swap(struct pipe_screen *screen,
- struct pipe_context *ctx,
- struct pipe_resource *res,
- void *hDC);
-#endif /* _WIN32 */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
deleted file mode 100644
index 005b64fb090..00000000000
--- a/src/gallium/drivers/swr/swr_query.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "util/os_time.h"
-#include "swr_context.h"
-#include "swr_fence.h"
-#include "swr_query.h"
-#include "swr_screen.h"
-#include "swr_state.h"
-#include "common/os.h"
-
-static struct swr_query *
-swr_query(struct pipe_query *p)
-{
- return (struct swr_query *)p;
-}
-
-static struct pipe_query *
-swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
-{
- struct swr_query *pq;
-
- assert(type < PIPE_QUERY_TYPES);
- assert(index < MAX_SO_STREAMS);
-
- pq = (struct swr_query *) AlignedMalloc(sizeof(struct swr_query), 64);
-
- if (pq) {
- memset(pq, 0, sizeof(*pq));
- pq->type = type;
- pq->index = index;
- }
-
- return (struct pipe_query *)pq;
-}
-
-
-static void
-swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
-{
- struct swr_query *pq = swr_query(q);
-
- if (pq->fence) {
- if (swr_is_fence_pending(pq->fence))
- swr_fence_finish(pipe->screen, NULL, pq->fence, 0);
- swr_fence_reference(pipe->screen, &pq->fence, NULL);
- }
-
- AlignedFree(pq);
-}
-
-
-static bool
-swr_get_query_result(struct pipe_context *pipe,
- struct pipe_query *q,
- bool wait,
- union pipe_query_result *result)
-{
- struct swr_query *pq = swr_query(q);
- unsigned index = pq->index;
-
- if (pq->fence) {
- if (!wait && !swr_is_fence_done(pq->fence))
- return false;
-
- swr_fence_finish(pipe->screen, NULL, pq->fence, 0);
- swr_fence_reference(pipe->screen, &pq->fence, NULL);
- }
-
- /* All values are reset to 0 at swr_begin_query, except starting timestamp.
- * Counters become simply end values. */
- switch (pq->type) {
- /* Booleans */
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
- result->b = pq->result.core.DepthPassCount != 0;
- break;
- case PIPE_QUERY_GPU_FINISHED:
- result->b = true;
- break;
- /* Counters */
- case PIPE_QUERY_OCCLUSION_COUNTER:
- result->u64 = pq->result.core.DepthPassCount;
- break;
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_TIME_ELAPSED:
- result->u64 = pq->result.timestamp_end - pq->result.timestamp_start;
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- result->u64 = pq->result.coreFE.IaPrimitives;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- result->u64 = pq->result.coreFE.SoNumPrimsWritten[index];
- break;
- /* Structures */
- case PIPE_QUERY_SO_STATISTICS: {
- struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
- so_stats->num_primitives_written =
- pq->result.coreFE.SoNumPrimsWritten[index];
- so_stats->primitives_storage_needed =
- pq->result.coreFE.SoPrimStorageNeeded[index];
- } break;
- case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* os_get_time_nano returns nanoseconds */
- result->timestamp_disjoint.frequency = UINT64_C(1000000000);
- result->timestamp_disjoint.disjoint = FALSE;
- break;
- case PIPE_QUERY_PIPELINE_STATISTICS: {
- struct pipe_query_data_pipeline_statistics *p_stats =
- &result->pipeline_statistics;
- p_stats->ia_vertices = pq->result.coreFE.IaVertices;
- p_stats->ia_primitives = pq->result.coreFE.IaPrimitives;
- p_stats->vs_invocations = pq->result.coreFE.VsInvocations;
- p_stats->gs_invocations = pq->result.coreFE.GsInvocations;
- p_stats->gs_primitives = pq->result.coreFE.GsPrimitives;
- p_stats->c_invocations = pq->result.coreFE.CPrimitives;
- p_stats->c_primitives = pq->result.coreFE.CPrimitives;
- p_stats->ps_invocations = pq->result.core.PsInvocations;
- p_stats->hs_invocations = pq->result.coreFE.HsInvocations;
- p_stats->ds_invocations = pq->result.coreFE.DsInvocations;
- p_stats->cs_invocations = pq->result.core.CsInvocations;
- } break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
- uint64_t num_primitives_written =
- pq->result.coreFE.SoNumPrimsWritten[index];
- uint64_t primitives_storage_needed =
- pq->result.coreFE.SoPrimStorageNeeded[index];
- result->b = num_primitives_written > primitives_storage_needed;
- }
- break;
- default:
- assert(0 && "Unsupported query");
- break;
- }
-
- return true;
-}
-
-static bool
-swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_query *pq = swr_query(q);
-
- /* Initialize Results */
- memset(&pq->result, 0, sizeof(pq->result));
- switch (pq->type) {
- case PIPE_QUERY_GPU_FINISHED:
- case PIPE_QUERY_TIMESTAMP:
- /* nothing to do, but don't want the default */
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- pq->result.timestamp_start = swr_get_timestamp(pipe->screen);
- break;
- default:
- /* Core counters required. Update draw context with location to
- * store results. */
- swr_update_draw_context(ctx, &pq->result);
-
- /* Only change stat collection if there are no active queries */
- if (ctx->active_queries == 0) {
- ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, TRUE);
- ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, TRUE);
- }
- ctx->active_queries++;
- break;
- }
-
-
- return true;
-}
-
-static bool
-swr_end_query(struct pipe_context *pipe, struct pipe_query *q)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_query *pq = swr_query(q);
-
- switch (pq->type) {
- case PIPE_QUERY_GPU_FINISHED:
- /* nothing to do, but don't want the default */
- break;
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_TIME_ELAPSED:
- pq->result.timestamp_end = swr_get_timestamp(pipe->screen);
- break;
- default:
- /* Stats are updated asynchronously, a fence is used to signal
- * completion. */
- if (!pq->fence) {
- struct swr_screen *screen = swr_screen(pipe->screen);
- swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
- }
- swr_fence_submit(ctx, pq->fence);
-
- /* Only change stat collection if there are no active queries */
- ctx->active_queries--;
- if (ctx->active_queries == 0) {
- ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, FALSE);
- ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, FALSE);
- }
-
- break;
- }
-
- return true;
-}
-
-
-bool
-swr_check_render_cond(struct pipe_context *pipe)
-{
- struct swr_context *ctx = swr_context(pipe);
- bool b, wait;
- uint64_t result;
-
- if (!ctx->render_cond_query)
- return true; /* no query predicate, draw normally */
-
- wait = (ctx->render_cond_mode == PIPE_RENDER_COND_WAIT
- || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
-
- b = pipe->get_query_result(
- pipe, ctx->render_cond_query, wait, (union pipe_query_result *)&result);
- if (b)
- return ((!result) == ctx->render_cond_cond);
- else
- return true;
-}
-
-
-static void
-swr_set_active_query_state(struct pipe_context *pipe, bool enable)
-{
-}
-
-void
-swr_query_init(struct pipe_context *pipe)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- pipe->create_query = swr_create_query;
- pipe->destroy_query = swr_destroy_query;
- pipe->begin_query = swr_begin_query;
- pipe->end_query = swr_end_query;
- pipe->get_query_result = swr_get_query_result;
- pipe->set_active_query_state = swr_set_active_query_state;
-
- ctx->active_queries = 0;
-}
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
deleted file mode 100644
index d838dc859e2..00000000000
--- a/src/gallium/drivers/swr/swr_query.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_QUERY_H
-#define SWR_QUERY_H
-
-
-#include <limits.h>
-
-struct swr_query_result {
- SWR_STATS core;
- SWR_STATS_FE coreFE;
- uint64_t timestamp_start;
- uint64_t timestamp_end;
-};
-
-OSALIGNLINE(struct) swr_query {
- unsigned type; /* PIPE_QUERY_* */
- unsigned index;
-
- struct swr_query_result result;
- struct pipe_fence_handle *fence;
-};
-
-extern void swr_query_init(struct pipe_context *pipe);
-
-extern bool swr_check_render_cond(struct pipe_context *pipe);
-#endif
diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h
deleted file mode 100644
index 2228dff7488..00000000000
--- a/src/gallium/drivers/swr/swr_resource.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_RESOURCE_H
-#define SWR_RESOURCE_H
-
-#include "memory/SurfaceState.h"
-#include "pipe/p_state.h"
-#include "api.h"
-
-struct sw_displaytarget;
-
-enum swr_resource_status {
- SWR_RESOURCE_UNUSED = 0x0,
- SWR_RESOURCE_READ = 0x1,
- SWR_RESOURCE_WRITE = 0x2,
-};
-
-struct swr_resource {
- struct pipe_resource base;
-
- bool has_depth;
- bool has_stencil;
-
- SWR_SURFACE_STATE swr;
- SWR_SURFACE_STATE secondary; /* for faking depth/stencil merged formats */
-
- struct sw_displaytarget *display_target;
-
- /* If resource is multisample, then this points to a alternate resource
- * containing the resolved multisample surface, otherwise null */
- struct pipe_resource *resolve_target;
-
- size_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
- size_t secondary_mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
-
- enum swr_resource_status status;
-
- /* last pipe that used (validated) this resource */
- struct pipe_context *curr_pipe;
-};
-
-
-static INLINE struct swr_resource *
-swr_resource(struct pipe_resource *resource)
-{
- return (struct swr_resource *)resource;
-}
-
-static INLINE bool
-swr_resource_is_texture(const struct pipe_resource *resource)
-{
- switch (resource->target) {
- case PIPE_BUFFER:
- return false;
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_3D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return true;
- default:
- assert(0);
- return false;
- }
-}
-
-
-static INLINE uint8_t *
-swr_resource_data(struct pipe_resource *resource)
-{
- struct swr_resource *swr_r = swr_resource(resource);
-
- assert(!swr_resource_is_texture(resource));
-
- return (uint8_t*)(swr_r->swr.xpBaseAddress);
-}
-
-
-void swr_invalidate_render_target(struct pipe_context *pipe,
- uint32_t attachment,
- uint16_t width, uint16_t height);
-
-void swr_store_render_target(struct pipe_context *pipe,
- uint32_t attachment,
- enum SWR_TILE_STATE post_tile_state);
-
-void swr_store_dirty_resource(struct pipe_context *pipe,
- struct pipe_resource *resource,
- enum SWR_TILE_STATE post_tile_state);
-
-void swr_update_resource_status(struct pipe_context *,
- const struct pipe_draw_info *);
-
-/*
- * Functions to indicate a resource's in-use status.
- */
-static INLINE enum
-swr_resource_status & operator|=(enum swr_resource_status & a,
- enum swr_resource_status b) {
- return (enum swr_resource_status &)((int&)a |= (int)b);
-}
-
-static INLINE void
-swr_resource_read(struct pipe_resource *resource)
-{
- swr_resource(resource)->status |= SWR_RESOURCE_READ;
-}
-
-static INLINE void
-swr_resource_write(struct pipe_resource *resource)
-{
- swr_resource(resource)->status |= SWR_RESOURCE_WRITE;
-}
-
-static INLINE void
-swr_resource_unused(struct pipe_resource *resource)
-{
- swr_resource(resource)->status = SWR_RESOURCE_UNUSED;
-}
-
-#endif
diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp
deleted file mode 100644
index 66f18365cc7..00000000000
--- a/src/gallium/drivers/swr/swr_scratch.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "util/u_memory.h"
-#include "swr_context.h"
-#include "swr_screen.h"
-#include "swr_scratch.h"
-#include "swr_fence.h"
-#include "swr_fence_work.h"
-#include "api.h"
-
-void *
-swr_copy_to_scratch_space(struct swr_context *ctx,
- struct swr_scratch_space *space,
- const void *user_buffer,
- unsigned int size)
-{
- void *ptr;
- assert(space);
- assert(size);
-
- /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
- uint32_t max_size_in_flight = size * ctx->max_draws_in_flight;
-
- /* Need to grow space */
- if (max_size_in_flight > space->current_size) {
- space->current_size = max_size_in_flight;
-
- if (space->base) {
- /* defer delete, use aligned-free, fence finish enforces the defer
- * delete will be on the *next* fence */
- struct swr_screen *screen = swr_screen(ctx->pipe.screen);
- swr_fence_finish(ctx->pipe.screen, NULL, screen->flush_fence, 0);
- swr_fence_work_free(screen->flush_fence, space->base, true);
- space->base = NULL;
- }
-
- if (!space->base) {
- space->base = (uint8_t *)AlignedMalloc(space->current_size,
- sizeof(void *));
- space->head = (void *)space->base;
- }
- }
-
- /* Wrap */
- if (((uint8_t *)space->head + size)
- >= ((uint8_t *)space->base + space->current_size)) {
- space->head = space->base;
- }
-
- ptr = space->head;
- space->head = (uint8_t *)space->head + size;
-
- /* Copy user_buffer to scratch */
- if (user_buffer)
- memcpy(ptr, user_buffer, size);
-
- return ptr;
-}
-
-
-void
-swr_init_scratch_buffers(struct swr_context *ctx)
-{
- struct swr_scratch_buffers *scratch;
-
- scratch = CALLOC_STRUCT(swr_scratch_buffers);
- ctx->scratch = scratch;
-}
-
-void
-swr_destroy_scratch_buffers(struct swr_context *ctx)
-{
- struct swr_scratch_buffers *scratch = ctx->scratch;
-
- if (scratch) {
- AlignedFree(scratch->vs_constants.base);
- AlignedFree(scratch->fs_constants.base);
- AlignedFree(scratch->gs_constants.base);
- AlignedFree(scratch->tcs_constants.base);
- AlignedFree(scratch->tes_constants.base);
- AlignedFree(scratch->vertex_buffer.base);
- AlignedFree(scratch->index_buffer.base);
- FREE(scratch);
- }
-}
diff --git a/src/gallium/drivers/swr/swr_scratch.h b/src/gallium/drivers/swr/swr_scratch.h
deleted file mode 100644
index 4d1c82fc6fc..00000000000
--- a/src/gallium/drivers/swr/swr_scratch.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_SCRATCH_H
-#define SWR_SCRATCH_H
-
-struct swr_scratch_space {
- void *head;
- unsigned int current_size;
- /* TODO XXX: Add a fence for wrap condition. */
-
- void *base;
-};
-
-struct swr_scratch_buffers {
- struct swr_scratch_space vs_constants;
- struct swr_scratch_space fs_constants;
- struct swr_scratch_space gs_constants;
- struct swr_scratch_space tcs_constants;
- struct swr_scratch_space tes_constants;
- struct swr_scratch_space vertex_buffer;
- struct swr_scratch_space index_buffer;
-};
-
-
-/*
- * swr_copy_to_scratch_space
- * Copies size bytes of user_buffer into the scratch ring buffer.
- * Used to store temporary data such as client arrays and constants.
- *
- * Inputs:
- * space ptr to scratch pool (vs_constants, fs_constants)
- * user_buffer, data to copy into scratch space
- * size to be copied
- * Returns:
- * pointer to data copied to scratch space.
- */
-void *swr_copy_to_scratch_space(struct swr_context *ctx,
- struct swr_scratch_space *space,
- const void *user_buffer,
- unsigned int size);
-
-void swr_init_scratch_buffers(struct swr_context *ctx);
-void swr_destroy_scratch_buffers(struct swr_context *ctx);
-
-#endif
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
deleted file mode 100644
index 4c274fd86e5..00000000000
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ /dev/null
@@ -1,1155 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include "swr_context.h"
-#include "swr_public.h"
-#include "swr_screen.h"
-#include "swr_resource.h"
-#include "swr_fence.h"
-#include "gen_knobs.h"
-
-#include "pipe/p_screen.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "util/format/u_format.h"
-#include "util/u_inlines.h"
-#include "util/u_cpu_detect.h"
-#include "util/format/u_format_s3tc.h"
-#include "util/u_string.h"
-#include "util/u_screen.h"
-
-#include "frontend/sw_winsys.h"
-
-#include "jit_api.h"
-
-#include "memory/TilingFunctions.h"
-
-#include <stdio.h>
-#include <map>
-
-/*
- * Max texture sizes
- * XXX Check max texture size values against core and sampler.
- */
-#define SWR_MAX_TEXTURE_SIZE (2 * 1024 * 1024 * 1024ULL) /* 2GB */
-/* Not all texture formats can fit into 2GB limit, but we have to
- live with that. See lp_limits.h for more details */
-#define SWR_MAX_TEXTURE_2D_SIZE 16384
-#define SWR_MAX_TEXTURE_3D_LEVELS 12 /* 2K x 2K x 2K for now */
-#define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */
-#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
-
-/* Default max client_copy_limit */
-#define SWR_CLIENT_COPY_LIMIT 8192
-
-/* Flag indicates creation of alternate surface, to prevent recursive loop
- * in resource creation when msaa_force_enable is set. */
-#define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
-
-
-static const char *
-swr_get_name(struct pipe_screen *screen)
-{
- static char buf[100];
- snprintf(buf, sizeof(buf), "SWR (LLVM " MESA_LLVM_VERSION_STRING ", %u bits)",
- lp_native_vector_width);
- return buf;
-}
-
-static const char *
-swr_get_vendor(struct pipe_screen *screen)
-{
- return "Intel Corporation";
-}
-
-static bool
-swr_is_format_supported(struct pipe_screen *_screen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned storage_sample_count,
- unsigned bind)
-{
- struct swr_screen *screen = swr_screen(_screen);
- struct sw_winsys *winsys = screen->winsys;
- const struct util_format_description *format_desc;
-
- assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D
- || target == PIPE_TEXTURE_1D_ARRAY
- || target == PIPE_TEXTURE_2D
- || target == PIPE_TEXTURE_2D_ARRAY
- || target == PIPE_TEXTURE_RECT
- || target == PIPE_TEXTURE_3D
- || target == PIPE_TEXTURE_CUBE
- || target == PIPE_TEXTURE_CUBE_ARRAY);
-
- if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
- return false;
-
- format_desc = util_format_description(format);
- if (!format_desc)
- return false;
-
- if ((sample_count > screen->msaa_max_count)
- || !util_is_power_of_two_or_zero(sample_count))
- return false;
-
- if (bind & PIPE_BIND_DISPLAY_TARGET) {
- if (!winsys->is_displaytarget_format_supported(winsys, bind, format))
- return false;
- }
-
- if (bind & PIPE_BIND_RENDER_TARGET) {
- if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
- return false;
-
- if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
- return false;
-
- /*
- * Although possible, it is unnatural to render into compressed or YUV
- * surfaces. So disable these here to avoid going into weird paths
- * inside gallium frontends.
- */
- if (format_desc->block.width != 1 || format_desc->block.height != 1)
- return false;
- }
-
- if (bind & PIPE_BIND_DEPTH_STENCIL) {
- if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- return false;
-
- if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
- return false;
- }
-
- if (bind & PIPE_BIND_VERTEX_BUFFER) {
- if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) {
- return false;
- }
- }
-
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC ||
- format_desc->layout == UTIL_FORMAT_LAYOUT_FXT1)
- {
- return false;
- }
-
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
- format != PIPE_FORMAT_ETC1_RGB8) {
- return false;
- }
-
- if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) &&
- ((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) {
- /* Disable all 3-channel formats, where channel size != 32 bits.
- * In some cases we run into crashes (in generate_unswizzled_blend()),
- * for 3-channel RGB16 variants, there was an apparent LLVM bug.
- * In any case, disabling the shallower 3-channel formats avoids a
- * number of issues with GL_ARB_copy_image support.
- */
- if (format_desc->is_array &&
- format_desc->nr_channels == 3 &&
- format_desc->block.bits != 96) {
- return false;
- }
- }
-
- return TRUE;
-}
-
-static int
-swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
-{
- switch (param) {
- /* limits */
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return PIPE_MAX_COLOR_BUFS;
- case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
- return SWR_MAX_TEXTURE_2D_SIZE;
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return SWR_MAX_TEXTURE_3D_LEVELS;
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return SWR_MAX_TEXTURE_CUBE_LEVELS;
- case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- return MAX_SO_STREAMS;
- case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
- case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- return MAX_ATTRIBUTES * 4;
- case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
- case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
- return 1024;
- case PIPE_CAP_MAX_VERTEX_STREAMS:
- return 4;
- case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
- return 2048;
- case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return SWR_MAX_TEXTURE_ARRAY_LAYERS;
- case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
- case PIPE_CAP_MIN_TEXEL_OFFSET:
- return -8;
- case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
- case PIPE_CAP_MAX_TEXEL_OFFSET:
- return 7;
- case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
- return 4;
- case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 330;
- case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
- return 140;
- case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
- return 16;
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return 64;
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- return 65536;
- case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- return 1;
- case PIPE_CAP_MAX_VIEWPORTS:
- return KNOB_NUM_VIEWPORTS_SCISSORS;
- case PIPE_CAP_ENDIANNESS:
- return PIPE_ENDIAN_NATIVE;
-
- /* supported features */
- case PIPE_CAP_NPOT_TEXTURES:
- case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
- case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
- case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
- case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
- case PIPE_CAP_VERTEX_SHADER_SATURATE:
- case PIPE_CAP_POINT_SPRITE:
- case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- case PIPE_CAP_OCCLUSION_QUERY:
- case PIPE_CAP_QUERY_TIME_ELAPSED:
- case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
- case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
- case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_PRIMITIVE_RESTART:
- case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
- case PIPE_CAP_TGSI_INSTANCEID:
- case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- case PIPE_CAP_START_INSTANCE:
- case PIPE_CAP_SEAMLESS_CUBE_MAP:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- case PIPE_CAP_CONDITIONAL_RENDER:
- case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
- case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
- case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
- case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
- case PIPE_CAP_QUERY_TIMESTAMP:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
- case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
- case PIPE_CAP_DRAW_INDIRECT:
- case PIPE_CAP_UMA:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
- case PIPE_CAP_CLIP_HALFZ:
- case PIPE_CAP_POLYGON_OFFSET_CLAMP:
- case PIPE_CAP_DEPTH_BOUNDS_TEST:
- case PIPE_CAP_CLEAR_TEXTURE:
- case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
- case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
- case PIPE_CAP_CULL_DISTANCE:
- case PIPE_CAP_CUBE_MAP_ARRAY:
- case PIPE_CAP_DOUBLES:
- case PIPE_CAP_TEXTURE_QUERY_LOD:
- case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
- case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
- case PIPE_CAP_QUERY_SO_OVERFLOW:
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
- case PIPE_CAP_IMAGE_STORE_FORMATTED:
- return 1;
-
- case PIPE_CAP_SHAREABLE_SHADERS:
- return 0;
-
- /* MSAA support
- * If user has explicitly set max_sample_count = 1 (via SWR_MSAA_MAX_COUNT)
- * then disable all MSAA support and go back to old (FAKE_SW_MSAA) caps. */
- case PIPE_CAP_TEXTURE_MULTISAMPLE:
- case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
- return (swr_screen(screen)->msaa_max_count > 1) ? 1 : 0;
- case PIPE_CAP_FAKE_SW_MSAA:
- return (swr_screen(screen)->msaa_max_count > 1) ? 0 : 1;
-
- /* fetch jit change for 2-4GB buffers requires alignment */
- case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
- return 1;
-
- /* unsupported features */
- case PIPE_CAP_TEXTURE_TRANSFER_MODES:
- case PIPE_CAP_PCI_GROUP:
- case PIPE_CAP_PCI_BUS:
- case PIPE_CAP_PCI_DEVICE:
- case PIPE_CAP_PCI_FUNCTION:
- case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
- return 0;
- case PIPE_CAP_MAX_GS_INVOCATIONS:
- return 32;
- case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
- return 1 << 27;
- case PIPE_CAP_MAX_VARYINGS:
- return 32;
-
- case PIPE_CAP_VENDOR_ID:
- return 0xFFFFFFFF;
- case PIPE_CAP_DEVICE_ID:
- return 0xFFFFFFFF;
- case PIPE_CAP_ACCELERATED:
- return 0;
- case PIPE_CAP_VIDEO_MEMORY: {
- /* XXX: Do we want to return the full amount of system memory ? */
- uint64_t system_memory;
-
- if (!os_get_total_physical_memory(&system_memory))
- return 0;
-
- return (int)(system_memory >> 20);
- }
- default:
- return u_pipe_screen_get_param_defaults(screen, param);
- }
-}
-
-static int
-swr_get_shader_param(struct pipe_screen *screen,
- enum pipe_shader_type shader,
- enum pipe_shader_cap param)
-{
- if (shader != PIPE_SHADER_VERTEX &&
- shader != PIPE_SHADER_FRAGMENT &&
- shader != PIPE_SHADER_GEOMETRY &&
- shader != PIPE_SHADER_TESS_CTRL &&
- shader != PIPE_SHADER_TESS_EVAL)
- return 0;
-
- if (param == PIPE_SHADER_CAP_MAX_SHADER_BUFFERS ||
- param == PIPE_SHADER_CAP_MAX_SHADER_IMAGES) {
- return 0;
- }
-
- return gallivm_get_shader_param(param);
-}
-
-
-static float
-swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
-{
- switch (param) {
- case PIPE_CAPF_MIN_LINE_WIDTH:
- case PIPE_CAPF_MIN_LINE_WIDTH_AA:
- case PIPE_CAPF_MIN_POINT_SIZE:
- case PIPE_CAPF_MIN_POINT_SIZE_AA:
- return 1;
- case PIPE_CAPF_POINT_SIZE_GRANULARITY:
- case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
- return 0.1;
- case PIPE_CAPF_MAX_LINE_WIDTH:
- case PIPE_CAPF_MAX_LINE_WIDTH_AA:
- case PIPE_CAPF_MAX_POINT_SIZE:
- return 255.0; /* arbitrary */
- case PIPE_CAPF_MAX_POINT_SIZE_AA:
- return 0.0;
- case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
- return 0.0;
- case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
- return 16.0; /* arbitrary */
- case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
- case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
- case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
- return 0.0f;
- }
- /* should only get here on unhandled cases */
- debug_printf("Unexpected PIPE_CAPF %d query\n", param);
- return 0.0;
-}
-
-SWR_FORMAT
-mesa_to_swr_format(enum pipe_format format)
-{
- static const std::map<pipe_format,SWR_FORMAT> mesa2swr = {
- /* depth / stencil */
- {PIPE_FORMAT_Z16_UNORM, R16_UNORM}, // z
- {PIPE_FORMAT_Z32_FLOAT, R32_FLOAT}, // z
- {PIPE_FORMAT_Z24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS}, // z
- {PIPE_FORMAT_Z24X8_UNORM, R24_UNORM_X8_TYPELESS}, // z
- {PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, R32_FLOAT_X8X24_TYPELESS}, // z
-
- /* alpha */
- {PIPE_FORMAT_A8_UNORM, A8_UNORM},
- {PIPE_FORMAT_A16_UNORM, A16_UNORM},
- {PIPE_FORMAT_A16_FLOAT, A16_FLOAT},
- {PIPE_FORMAT_A32_FLOAT, A32_FLOAT},
-
- /* odd sizes, bgr */
- {PIPE_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM},
- {PIPE_FORMAT_B5G6R5_SRGB, B5G6R5_UNORM_SRGB},
- {PIPE_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM},
- {PIPE_FORMAT_B5G5R5X1_UNORM, B5G5R5X1_UNORM},
- {PIPE_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM},
- {PIPE_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM},
- {PIPE_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB},
- {PIPE_FORMAT_B8G8R8X8_UNORM, B8G8R8X8_UNORM},
- {PIPE_FORMAT_B8G8R8X8_SRGB, B8G8R8X8_UNORM_SRGB},
-
- /* rgb10a2 */
- {PIPE_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM},
- {PIPE_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM},
- {PIPE_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED},
- {PIPE_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED},
- {PIPE_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT},
-
- /* rgb10x2 */
- {PIPE_FORMAT_R10G10B10X2_USCALED, R10G10B10X2_USCALED},
-
- /* bgr10a2 */
- {PIPE_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM},
- {PIPE_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM},
- {PIPE_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED},
- {PIPE_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED},
- {PIPE_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT},
-
- /* bgr10x2 */
- {PIPE_FORMAT_B10G10R10X2_UNORM, B10G10R10X2_UNORM},
-
- /* r11g11b10 */
- {PIPE_FORMAT_R11G11B10_FLOAT, R11G11B10_FLOAT},
-
- /* 32 bits per component */
- {PIPE_FORMAT_R32_FLOAT, R32_FLOAT},
- {PIPE_FORMAT_R32G32_FLOAT, R32G32_FLOAT},
- {PIPE_FORMAT_R32G32B32_FLOAT, R32G32B32_FLOAT},
- {PIPE_FORMAT_R32G32B32A32_FLOAT, R32G32B32A32_FLOAT},
- {PIPE_FORMAT_R32G32B32X32_FLOAT, R32G32B32X32_FLOAT},
-
- {PIPE_FORMAT_R32_USCALED, R32_USCALED},
- {PIPE_FORMAT_R32G32_USCALED, R32G32_USCALED},
- {PIPE_FORMAT_R32G32B32_USCALED, R32G32B32_USCALED},
- {PIPE_FORMAT_R32G32B32A32_USCALED, R32G32B32A32_USCALED},
-
- {PIPE_FORMAT_R32_SSCALED, R32_SSCALED},
- {PIPE_FORMAT_R32G32_SSCALED, R32G32_SSCALED},
- {PIPE_FORMAT_R32G32B32_SSCALED, R32G32B32_SSCALED},
- {PIPE_FORMAT_R32G32B32A32_SSCALED, R32G32B32A32_SSCALED},
-
- {PIPE_FORMAT_R32_UINT, R32_UINT},
- {PIPE_FORMAT_R32G32_UINT, R32G32_UINT},
- {PIPE_FORMAT_R32G32B32_UINT, R32G32B32_UINT},
- {PIPE_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT},
-
- {PIPE_FORMAT_R32_SINT, R32_SINT},
- {PIPE_FORMAT_R32G32_SINT, R32G32_SINT},
- {PIPE_FORMAT_R32G32B32_SINT, R32G32B32_SINT},
- {PIPE_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT},
-
- /* 16 bits per component */
- {PIPE_FORMAT_R16_UNORM, R16_UNORM},
- {PIPE_FORMAT_R16G16_UNORM, R16G16_UNORM},
- {PIPE_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM},
- {PIPE_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM},
- {PIPE_FORMAT_R16G16B16X16_UNORM, R16G16B16X16_UNORM},
-
- {PIPE_FORMAT_R16_USCALED, R16_USCALED},
- {PIPE_FORMAT_R16G16_USCALED, R16G16_USCALED},
- {PIPE_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED},
- {PIPE_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED},
-
- {PIPE_FORMAT_R16_SNORM, R16_SNORM},
- {PIPE_FORMAT_R16G16_SNORM, R16G16_SNORM},
- {PIPE_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM},
- {PIPE_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM},
-
- {PIPE_FORMAT_R16_SSCALED, R16_SSCALED},
- {PIPE_FORMAT_R16G16_SSCALED, R16G16_SSCALED},
- {PIPE_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED},
- {PIPE_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED},
-
- {PIPE_FORMAT_R16_UINT, R16_UINT},
- {PIPE_FORMAT_R16G16_UINT, R16G16_UINT},
- {PIPE_FORMAT_R16G16B16_UINT, R16G16B16_UINT},
- {PIPE_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT},
-
- {PIPE_FORMAT_R16_SINT, R16_SINT},
- {PIPE_FORMAT_R16G16_SINT, R16G16_SINT},
- {PIPE_FORMAT_R16G16B16_SINT, R16G16B16_SINT},
- {PIPE_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT},
-
- {PIPE_FORMAT_R16_FLOAT, R16_FLOAT},
- {PIPE_FORMAT_R16G16_FLOAT, R16G16_FLOAT},
- {PIPE_FORMAT_R16G16B16_FLOAT, R16G16B16_FLOAT},
- {PIPE_FORMAT_R16G16B16A16_FLOAT, R16G16B16A16_FLOAT},
- {PIPE_FORMAT_R16G16B16X16_FLOAT, R16G16B16X16_FLOAT},
-
- /* 8 bits per component */
- {PIPE_FORMAT_R8_UNORM, R8_UNORM},
- {PIPE_FORMAT_R8G8_UNORM, R8G8_UNORM},
- {PIPE_FORMAT_R8G8B8_UNORM, R8G8B8_UNORM},
- {PIPE_FORMAT_R8G8B8_SRGB, R8G8B8_UNORM_SRGB},
- {PIPE_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM},
- {PIPE_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB},
- {PIPE_FORMAT_R8G8B8X8_UNORM, R8G8B8X8_UNORM},
- {PIPE_FORMAT_R8G8B8X8_SRGB, R8G8B8X8_UNORM_SRGB},
-
- {PIPE_FORMAT_R8_USCALED, R8_USCALED},
- {PIPE_FORMAT_R8G8_USCALED, R8G8_USCALED},
- {PIPE_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED},
- {PIPE_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED},
-
- {PIPE_FORMAT_R8_SNORM, R8_SNORM},
- {PIPE_FORMAT_R8G8_SNORM, R8G8_SNORM},
- {PIPE_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM},
- {PIPE_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM},
-
- {PIPE_FORMAT_R8_SSCALED, R8_SSCALED},
- {PIPE_FORMAT_R8G8_SSCALED, R8G8_SSCALED},
- {PIPE_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED},
- {PIPE_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED},
-
- {PIPE_FORMAT_R8_UINT, R8_UINT},
- {PIPE_FORMAT_R8G8_UINT, R8G8_UINT},
- {PIPE_FORMAT_R8G8B8_UINT, R8G8B8_UINT},
- {PIPE_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT},
-
- {PIPE_FORMAT_R8_SINT, R8_SINT},
- {PIPE_FORMAT_R8G8_SINT, R8G8_SINT},
- {PIPE_FORMAT_R8G8B8_SINT, R8G8B8_SINT},
- {PIPE_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT},
-
- /* These formats are valid for vertex data, but should not be used
- * for render targets.
- */
-
- {PIPE_FORMAT_R32_FIXED, R32_SFIXED},
- {PIPE_FORMAT_R32G32_FIXED, R32G32_SFIXED},
- {PIPE_FORMAT_R32G32B32_FIXED, R32G32B32_SFIXED},
- {PIPE_FORMAT_R32G32B32A32_FIXED, R32G32B32A32_SFIXED},
-
- {PIPE_FORMAT_R64_FLOAT, R64_FLOAT},
- {PIPE_FORMAT_R64G64_FLOAT, R64G64_FLOAT},
- {PIPE_FORMAT_R64G64B64_FLOAT, R64G64B64_FLOAT},
- {PIPE_FORMAT_R64G64B64A64_FLOAT, R64G64B64A64_FLOAT},
-
- /* These formats have entries in SWR but don't have Load/StoreTile
- * implementations. That means these aren't renderable, and thus having
- * a mapping entry here is detrimental.
- */
- /*
-
- {PIPE_FORMAT_L8_UNORM, L8_UNORM},
- {PIPE_FORMAT_I8_UNORM, I8_UNORM},
- {PIPE_FORMAT_L8A8_UNORM, L8A8_UNORM},
- {PIPE_FORMAT_L16_UNORM, L16_UNORM},
- {PIPE_FORMAT_UYVY, YCRCB_SWAPUVY},
-
- {PIPE_FORMAT_L8_SRGB, L8_UNORM_SRGB},
- {PIPE_FORMAT_L8A8_SRGB, L8A8_UNORM_SRGB},
-
- {PIPE_FORMAT_DXT1_RGBA, BC1_UNORM},
- {PIPE_FORMAT_DXT3_RGBA, BC2_UNORM},
- {PIPE_FORMAT_DXT5_RGBA, BC3_UNORM},
-
- {PIPE_FORMAT_DXT1_SRGBA, BC1_UNORM_SRGB},
- {PIPE_FORMAT_DXT3_SRGBA, BC2_UNORM_SRGB},
- {PIPE_FORMAT_DXT5_SRGBA, BC3_UNORM_SRGB},
-
- {PIPE_FORMAT_RGTC1_UNORM, BC4_UNORM},
- {PIPE_FORMAT_RGTC1_SNORM, BC4_SNORM},
- {PIPE_FORMAT_RGTC2_UNORM, BC5_UNORM},
- {PIPE_FORMAT_RGTC2_SNORM, BC5_SNORM},
-
- {PIPE_FORMAT_L16A16_UNORM, L16A16_UNORM},
- {PIPE_FORMAT_I16_UNORM, I16_UNORM},
- {PIPE_FORMAT_L16_FLOAT, L16_FLOAT},
- {PIPE_FORMAT_L16A16_FLOAT, L16A16_FLOAT},
- {PIPE_FORMAT_I16_FLOAT, I16_FLOAT},
- {PIPE_FORMAT_L32_FLOAT, L32_FLOAT},
- {PIPE_FORMAT_L32A32_FLOAT, L32A32_FLOAT},
- {PIPE_FORMAT_I32_FLOAT, I32_FLOAT},
-
- {PIPE_FORMAT_I8_UINT, I8_UINT},
- {PIPE_FORMAT_L8_UINT, L8_UINT},
- {PIPE_FORMAT_L8A8_UINT, L8A8_UINT},
-
- {PIPE_FORMAT_I8_SINT, I8_SINT},
- {PIPE_FORMAT_L8_SINT, L8_SINT},
- {PIPE_FORMAT_L8A8_SINT, L8A8_SINT},
-
- */
- };
-
- auto it = mesa2swr.find(format);
- if (it == mesa2swr.end())
- return (SWR_FORMAT)-1;
- else
- return it->second;
-}
-
-static bool
-swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res)
-{
- struct sw_winsys *winsys = screen->winsys;
- struct sw_displaytarget *dt;
-
- const unsigned width = align(res->swr.width, res->swr.halign);
- const unsigned height = align(res->swr.height, res->swr.valign);
-
- UINT stride;
- dt = winsys->displaytarget_create(winsys,
- res->base.bind,
- res->base.format,
- width, height,
- 64, NULL,
- &stride);
-
- if (dt == NULL)
- return false;
-
- void *map = winsys->displaytarget_map(winsys, dt, 0);
-
- res->display_target = dt;
- res->swr.xpBaseAddress = (gfxptr_t)map;
-
- /* Clear the display target surface */
- if (map)
- memset(map, 0, height * stride);
-
- winsys->displaytarget_unmap(winsys, dt);
-
- return true;
-}
-
-static bool
-swr_texture_layout(struct swr_screen *screen,
- struct swr_resource *res,
- bool allocate)
-{
- struct pipe_resource *pt = &res->base;
-
- pipe_format fmt = pt->format;
- const struct util_format_description *desc = util_format_description(fmt);
-
- res->has_depth = util_format_has_depth(desc);
- res->has_stencil = util_format_has_stencil(desc);
-
- if (res->has_stencil && !res->has_depth)
- fmt = PIPE_FORMAT_R8_UINT;
-
- /* We always use the SWR layout. For 2D and 3D textures this looks like:
- *
- * |<------- pitch ------->|
- * +=======================+-------
- * |Array 0 | ^
- * | | |
- * | Level 0 | |
- * | | |
- * | | qpitch
- * +-----------+-----------+ |
- * | | L2L2L2L2 | |
- * | Level 1 | L3L3 | |
- * | | L4 | v
- * +===========+===========+-------
- * |Array 1 |
- * | |
- * | Level 0 |
- * | |
- * | |
- * +-----------+-----------+
- * | | L2L2L2L2 |
- * | Level 1 | L3L3 |
- * | | L4 |
- * +===========+===========+
- *
- * The overall width in bytes is known as the pitch, while the overall
- * height in rows is the qpitch. Array slices are laid out logically below
- * one another, qpitch rows apart. For 3D surfaces, the "level" values are
- * just invalid for the higher array numbers (since depth is also
- * minified). 1D and 1D array surfaces are stored effectively the same way,
- * except that pitch never plays into it. All the levels are logically
- * adjacent to each other on the X axis. The qpitch becomes the number of
- * elements between array slices, while the pitch is unused.
- *
- * Each level's sizes are subject to the valign and halign settings of the
- * surface. For compressed formats that swr is unaware of, we will use an
- * appropriately-sized uncompressed format, and scale the widths/heights.
- *
- * This surface is stored inside res->swr. For depth/stencil textures,
- * res->secondary will have an identically-laid-out but R8_UINT-formatted
- * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp
- * texels, to simplify map/unmap logic which copies the stencil values
- * in/out.
- */
-
- res->swr.width = pt->width0;
- res->swr.height = pt->height0;
- res->swr.type = swr_convert_target_type(pt->target);
- res->swr.tileMode = SWR_TILE_NONE;
- res->swr.format = mesa_to_swr_format(fmt);
- res->swr.numSamples = std::max(1u, pt->nr_samples);
-
- if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
- res->swr.halign = KNOB_MACROTILE_X_DIM;
- res->swr.valign = KNOB_MACROTILE_Y_DIM;
-
- /* If SWR_MSAA_FORCE_ENABLE is set, turn on MSAA and override requested
- * surface sample count. */
- if (screen->msaa_force_enable) {
- res->swr.numSamples = screen->msaa_max_count;
- swr_print_info("swr_texture_layout: forcing sample count: %d\n",
- res->swr.numSamples);
- }
- } else {
- res->swr.halign = 1;
- res->swr.valign = 1;
- }
-
- unsigned halign = res->swr.halign * util_format_get_blockwidth(fmt);
- unsigned width = align(pt->width0, halign);
- if (pt->target == PIPE_TEXTURE_1D || pt->target == PIPE_TEXTURE_1D_ARRAY) {
- for (int level = 1; level <= pt->last_level; level++)
- width += align(u_minify(pt->width0, level), halign);
- res->swr.pitch = util_format_get_blocksize(fmt);
- res->swr.qpitch = util_format_get_nblocksx(fmt, width);
- } else {
- // The pitch is the overall width of the texture in bytes. Most of the
- // time this is the pitch of level 0 since all the other levels fit
- // underneath it. However in some degenerate situations, the width of
- // level1 + level2 may be larger. In that case, we use those
- // widths. This can happen if, e.g. halign is 32, and the width of level
- // 0 is 32 or less. In that case, the aligned levels 1 and 2 will also
- // be 32 each, adding up to 64.
- unsigned valign = res->swr.valign * util_format_get_blockheight(fmt);
- if (pt->last_level > 1) {
- width = std::max<uint32_t>(
- width,
- align(u_minify(pt->width0, 1), halign) +
- align(u_minify(pt->width0, 2), halign));
- }
- res->swr.pitch = util_format_get_stride(fmt, width);
-
- // The qpitch is controlled by either the height of the second LOD, or
- // the combination of all the later LODs.
- unsigned height = align(pt->height0, valign);
- if (pt->last_level == 1) {
- height += align(u_minify(pt->height0, 1), valign);
- } else if (pt->last_level > 1) {
- unsigned level1 = align(u_minify(pt->height0, 1), valign);
- unsigned level2 = 0;
- for (int level = 2; level <= pt->last_level; level++) {
- level2 += align(u_minify(pt->height0, level), valign);
- }
- height += std::max(level1, level2);
- }
- res->swr.qpitch = util_format_get_nblocksy(fmt, height);
- }
-
- if (pt->target == PIPE_TEXTURE_3D)
- res->swr.depth = pt->depth0;
- else
- res->swr.depth = pt->array_size;
-
- // Fix up swr format if necessary so that LOD offset computation works
- if (res->swr.format == (SWR_FORMAT)-1) {
- switch (util_format_get_blocksize(fmt)) {
- default:
- unreachable("Unexpected format block size");
- case 1: res->swr.format = R8_UINT; break;
- case 2: res->swr.format = R16_UINT; break;
- case 4: res->swr.format = R32_UINT; break;
- case 8:
- if (util_format_is_compressed(fmt))
- res->swr.format = BC4_UNORM;
- else
- res->swr.format = R32G32_UINT;
- break;
- case 16:
- if (util_format_is_compressed(fmt))
- res->swr.format = BC5_UNORM;
- else
- res->swr.format = R32G32B32A32_UINT;
- break;
- }
- }
-
- for (int level = 0; level <= pt->last_level; level++) {
- res->mip_offsets[level] =
- ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->swr);
- }
-
- size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch *
- res->swr.pitch * res->swr.numSamples;
-
- // Let non-sampled textures (e.g. buffer objects) bypass the size limit
- if (swr_resource_is_texture(&res->base) && total_size > SWR_MAX_TEXTURE_SIZE)
- return false;
-
- if (allocate) {
- res->swr.xpBaseAddress = (gfxptr_t)AlignedMalloc(total_size, 64);
- if (!res->swr.xpBaseAddress)
- return false;
-
- if (res->has_depth && res->has_stencil) {
- res->secondary = res->swr;
- res->secondary.format = R8_UINT;
- res->secondary.pitch = res->swr.pitch / util_format_get_blocksize(fmt);
-
- for (int level = 0; level <= pt->last_level; level++) {
- res->secondary_mip_offsets[level] =
- ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->secondary);
- }
-
- total_size = res->secondary.depth * res->secondary.qpitch *
- res->secondary.pitch * res->secondary.numSamples;
-
- res->secondary.xpBaseAddress = (gfxptr_t) AlignedMalloc(total_size, 64);
- if (!res->secondary.xpBaseAddress) {
- AlignedFree((void *)res->swr.xpBaseAddress);
- return false;
- }
- }
- }
-
- return true;
-}
-
-static bool
-swr_can_create_resource(struct pipe_screen *screen,
- const struct pipe_resource *templat)
-{
- struct swr_resource res;
- memset(&res, 0, sizeof(res));
- res.base = *templat;
- return swr_texture_layout(swr_screen(screen), &res, false);
-}
-
-/* Helper function that conditionally creates a single-sample resolve resource
- * and attaches it to main multisample resource. */
-static bool
-swr_create_resolve_resource(struct pipe_screen *_screen,
- struct swr_resource *msaa_res)
-{
- struct swr_screen *screen = swr_screen(_screen);
-
- /* If resource is multisample, create a single-sample resolve resource */
- if (msaa_res->base.nr_samples > 1 || (screen->msaa_force_enable &&
- !(msaa_res->base.flags & SWR_RESOURCE_FLAG_ALT_SURFACE))) {
-
- /* Create a single-sample copy of the resource. Copy the original
- * resource parameters and set flag to prevent recursion when re-calling
- * resource_create */
- struct pipe_resource alt_template = msaa_res->base;
- alt_template.nr_samples = 0;
- alt_template.flags |= SWR_RESOURCE_FLAG_ALT_SURFACE;
-
- /* Note: Display_target is a special single-sample resource, only the
- * display_target has been created already. */
- if (msaa_res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT
- | PIPE_BIND_SHARED)) {
- /* Allocate the multisample buffers. */
- if (!swr_texture_layout(screen, msaa_res, true))
- return false;
-
- /* Alt resource will only be bound as PIPE_BIND_RENDER_TARGET
- * remove the DISPLAY_TARGET, SCANOUT, and SHARED bindings */
- alt_template.bind = PIPE_BIND_RENDER_TARGET;
- }
-
- /* Allocate single-sample resolve surface */
- struct pipe_resource *alt;
- alt = _screen->resource_create(_screen, &alt_template);
- if (!alt)
- return false;
-
- /* Attach it to the multisample resource */
- msaa_res->resolve_target = alt;
-
- /* Hang resolve surface state off the multisample surface state to so
- * StoreTiles knows where to resolve the surface. */
- msaa_res->swr.xpAuxBaseAddress = (gfxptr_t)&swr_resource(alt)->swr;
- }
-
- return true; /* success */
-}
-
-static struct pipe_resource *
-swr_resource_create(struct pipe_screen *_screen,
- const struct pipe_resource *templat)
-{
- struct swr_screen *screen = swr_screen(_screen);
- struct swr_resource *res = CALLOC_STRUCT(swr_resource);
- if (!res)
- return NULL;
-
- res->base = *templat;
- pipe_reference_init(&res->base.reference, 1);
- res->base.screen = &screen->base;
-
- if (swr_resource_is_texture(&res->base)) {
- if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT
- | PIPE_BIND_SHARED)) {
- /* displayable surface
- * first call swr_texture_layout without allocating to finish
- * filling out the SWR_SURFACE_STATE in res */
- swr_texture_layout(screen, res, false);
- if (!swr_displaytarget_layout(screen, res))
- goto fail;
- } else {
- /* texture map */
- if (!swr_texture_layout(screen, res, true))
- goto fail;
- }
-
- /* If resource was multisample, create resolve resource and attach
- * it to multisample resource. */
- if (!swr_create_resolve_resource(_screen, res))
- goto fail;
-
- } else {
- /* other data (vertex buffer, const buffer, etc) */
- assert(util_format_get_blocksize(templat->format) == 1);
- assert(templat->height0 == 1);
- assert(templat->depth0 == 1);
- assert(templat->last_level == 0);
-
- /* Easiest to just call swr_texture_layout, as it sets up
- * SWR_SURFACE_STATE in res */
- if (!swr_texture_layout(screen, res, true))
- goto fail;
- }
-
- return &res->base;
-
-fail:
- FREE(res);
- return NULL;
-}
-
-static void
-swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
-{
- struct swr_screen *screen = swr_screen(p_screen);
- struct swr_resource *spr = swr_resource(pt);
-
- if (spr->display_target) {
- /* If resource is display target, winsys manages the buffer and will
- * free it on displaytarget_destroy. */
- swr_fence_finish(p_screen, NULL, screen->flush_fence, 0);
-
- struct sw_winsys *winsys = screen->winsys;
- winsys->displaytarget_destroy(winsys, spr->display_target);
-
- if (spr->swr.numSamples > 1) {
- /* Free an attached resolve resource */
- struct swr_resource *alt = swr_resource(spr->resolve_target);
- swr_fence_work_free(screen->flush_fence, (void*)(alt->swr.xpBaseAddress), true);
-
- /* Free multisample buffer */
- swr_fence_work_free(screen->flush_fence, (void*)(spr->swr.xpBaseAddress), true);
- }
- } else {
- /* For regular resources, defer deletion */
- swr_resource_unused(pt);
-
- if (spr->swr.numSamples > 1) {
- /* Free an attached resolve resource */
- struct swr_resource *alt = swr_resource(spr->resolve_target);
- swr_fence_work_free(screen->flush_fence, (void*)(alt->swr.xpBaseAddress), true);
- }
-
- swr_fence_work_free(screen->flush_fence, (void*)(spr->swr.xpBaseAddress), true);
- swr_fence_work_free(screen->flush_fence,
- (void*)(spr->secondary.xpBaseAddress), true);
-
- /* If work queue grows too large, submit a fence to force queue to
- * drain. This is mainly to decrease the amount of memory used by the
- * piglit streaming-texture-leak test */
- if (screen->pipe && swr_fence(screen->flush_fence)->work.count > 64)
- swr_fence_submit(swr_context(screen->pipe), screen->flush_fence);
- }
-
- FREE(spr);
-}
-
-
-static void
-swr_flush_frontbuffer(struct pipe_screen *p_screen,
- struct pipe_context *pipe,
- struct pipe_resource *resource,
- unsigned level,
- unsigned layer,
- void *context_private,
- struct pipe_box *sub_box)
-{
- struct swr_screen *screen = swr_screen(p_screen);
- struct sw_winsys *winsys = screen->winsys;
- struct swr_resource *spr = swr_resource(resource);
- struct swr_context *ctx = swr_context(pipe);
-
- if (pipe) {
- swr_fence_finish(p_screen, NULL, screen->flush_fence, 0);
- swr_resource_unused(resource);
- ctx->api.pfnSwrEndFrame(ctx->swrContext);
- }
-
- /* Multisample resolved into resolve_target at flush with store_resource */
- if (pipe && spr->swr.numSamples > 1) {
- struct pipe_resource *resolve_target = spr->resolve_target;
-
- /* Once resolved, copy into display target */
- SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;
-
- void *map = winsys->displaytarget_map(winsys, spr->display_target,
- PIPE_MAP_WRITE);
- memcpy(map, (void*)(resolve->xpBaseAddress), resolve->pitch * resolve->height);
- winsys->displaytarget_unmap(winsys, spr->display_target);
- }
-
- debug_assert(spr->display_target);
- if (spr->display_target)
- winsys->displaytarget_display(
- winsys, spr->display_target, context_private, sub_box);
-}
-
-
-void
-swr_destroy_screen_internal(struct swr_screen **screen)
-{
- struct pipe_screen *p_screen = &(*screen)->base;
-
- swr_fence_finish(p_screen, NULL, (*screen)->flush_fence, 0);
- swr_fence_reference(p_screen, &(*screen)->flush_fence, NULL);
-
- JitDestroyContext((*screen)->hJitMgr);
-
- if ((*screen)->pLibrary)
- util_dl_close((*screen)->pLibrary);
-
- FREE(*screen);
- *screen = NULL;
-}
-
-
-static void
-swr_destroy_screen(struct pipe_screen *p_screen)
-{
- struct swr_screen *screen = swr_screen(p_screen);
- struct sw_winsys *winsys = screen->winsys;
-
- swr_print_info("SWR destroy screen!\n");
-
- if (winsys->destroy)
- winsys->destroy(winsys);
-
- swr_destroy_screen_internal(&screen);
-}
-
-
-static void
-swr_validate_env_options(struct swr_screen *screen)
-{
- /* The client_copy_limit sets a maximum on the amount of user-buffer memory
- * copied to scratch space on a draw. Past this, the draw will access
- * user-buffer directly and then block. This is faster than queuing many
- * large client draws. */
- screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT;
- int client_copy_limit =
- debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT);
- if (client_copy_limit > 0)
- screen->client_copy_limit = client_copy_limit;
-
- /* XXX msaa under development, disable by default for now */
- screen->msaa_max_count = 1; /* was SWR_MAX_NUM_MULTISAMPLES; */
-
- /* validate env override values, within range and power of 2 */
- int msaa_max_count = debug_get_num_option("SWR_MSAA_MAX_COUNT", 1);
- if (msaa_max_count != 1) {
- if ((msaa_max_count < 1) || (msaa_max_count > SWR_MAX_NUM_MULTISAMPLES)
- || !util_is_power_of_two_or_zero(msaa_max_count)) {
- fprintf(stderr, "SWR_MSAA_MAX_COUNT invalid: %d\n", msaa_max_count);
- fprintf(stderr, "must be power of 2 between 1 and %d" \
- " (or 1 to disable msaa)\n",
- SWR_MAX_NUM_MULTISAMPLES);
- fprintf(stderr, "(msaa disabled)\n");
- msaa_max_count = 1;
- }
-
- swr_print_info("SWR_MSAA_MAX_COUNT: %d\n", msaa_max_count);
-
- screen->msaa_max_count = msaa_max_count;
- }
-
- screen->msaa_force_enable = debug_get_bool_option(
- "SWR_MSAA_FORCE_ENABLE", false);
- if (screen->msaa_force_enable)
- swr_print_info("SWR_MSAA_FORCE_ENABLE: true\n");
-}
-
-
-struct pipe_screen *
-swr_create_screen_internal(struct sw_winsys *winsys)
-{
- struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
-
- if (!screen)
- return NULL;
-
- if (!lp_build_init()) {
- FREE(screen);
- return NULL;
- }
-
- screen->winsys = winsys;
- screen->base.get_name = swr_get_name;
- screen->base.get_vendor = swr_get_vendor;
- screen->base.is_format_supported = swr_is_format_supported;
- screen->base.context_create = swr_create_context;
- screen->base.can_create_resource = swr_can_create_resource;
-
- screen->base.destroy = swr_destroy_screen;
- screen->base.get_param = swr_get_param;
- screen->base.get_shader_param = swr_get_shader_param;
- screen->base.get_paramf = swr_get_paramf;
-
- screen->base.resource_create = swr_resource_create;
- screen->base.resource_destroy = swr_resource_destroy;
-
- screen->base.flush_frontbuffer = swr_flush_frontbuffer;
-
- // Pass in "" for architecture for run-time determination
- screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, "", "swr");
-
- swr_fence_init(&screen->base);
-
- swr_validate_env_options(screen);
-
- return &screen->base;
-}
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
deleted file mode 100644
index e66f5443357..00000000000
--- a/src/gallium/drivers/swr/swr_screen.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_SCREEN_H
-#define SWR_SCREEN_H
-
-#include "swr_resource.h"
-
-#include "pipe/p_screen.h"
-#include "pipe/p_defines.h"
-#include "util/u_dl.h"
-#include "util/format/u_format.h"
-#include "api.h"
-
-#include "memory/TilingFunctions.h"
-#include "memory/InitMemory.h"
-#include <stdio.h>
-#include <stdarg.h>
-
-struct sw_winsys;
-
-struct swr_screen {
- struct pipe_screen base;
- struct pipe_context *pipe;
-
- struct pipe_fence_handle *flush_fence;
-
- struct sw_winsys *winsys;
-
- /* Configurable environment settings */
- bool msaa_force_enable;
- uint8_t msaa_max_count;
- uint32_t client_copy_limit;
-
- HANDLE hJitMgr;
-
- /* Dynamic backend implementations */
- util_dl_library *pLibrary;
- PFNSwrGetInterface pfnSwrGetInterface;
- PFNSwrGetTileInterface pfnSwrGetTileInterface;
-
- /* Do we run on Xeon Phi? */
- bool is_knl;
-};
-
-static INLINE struct swr_screen *
-swr_screen(struct pipe_screen *pipe)
-{
- return (struct swr_screen *)pipe;
-}
-
-SWR_FORMAT
-mesa_to_swr_format(enum pipe_format format);
-
-INLINE void swr_print_info(const char *format, ...)
-{
- static bool print_info = debug_get_bool_option("SWR_PRINT_INFO", false);
- if(print_info) {
- va_list args;
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
- }
-}
-
-#endif
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
deleted file mode 100644
index 315036920fb..00000000000
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ /dev/null
@@ -1,3040 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include <llvm/Config/llvm-config.h>
-
-#if LLVM_VERSION_MAJOR < 7
-// llvm redefines DEBUG
-#pragma push_macro("DEBUG")
-#undef DEBUG
-#endif
-
-#include "JitManager.h"
-#include "llvm-c/Core.h"
-#include "llvm/Support/CBindingWrapping.h"
-#include "llvm/IR/LegacyPassManager.h"
-
-#if LLVM_VERSION_MAJOR < 7
-#pragma pop_macro("DEBUG")
-#endif
-
-#include "state.h"
-#include "gen_state_llvm.h"
-#include "builder.h"
-#include "functionpasses/passes.h"
-
-#include "tgsi/tgsi_strings.h"
-#include "util/format/u_format.h"
-#include "util/u_prim.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_flow.h"
-#include "gallivm/lp_bld_struct.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_printf.h"
-#include "gallivm/lp_bld_logic.h"
-
-#include "swr_context.h"
-#include "gen_surf_state_llvm.h"
-#include "gen_swr_context_llvm.h"
-#include "swr_resource.h"
-#include "swr_state.h"
-#include "swr_screen.h"
-
-
-/////////////////////////////////////////////////////////////////////////
-
-#include <stdio.h>
-#include <inttypes.h>
-
-#include "util/u_debug.h"
-#include "util/u_memory.h"
-#include "util/u_string.h"
-
-#include "gallivm/lp_bld_type.h"
-
-#if defined(DEBUG) && defined(SWR_VERBOSE_SHADER)
-constexpr bool verbose_shader = true;
-constexpr bool verbose_tcs_shader_in = true;
-constexpr bool verbose_tcs_shader_out = true;
-constexpr bool verbose_tcs_shader_loop = true;
-constexpr bool verbose_vs_shader = true;
-#else
-constexpr bool verbose_shader = false;
-constexpr bool verbose_tcs_shader_in = false;
-constexpr bool verbose_tcs_shader_out = false;
-constexpr bool verbose_tcs_shader_loop = false;
-constexpr bool verbose_vs_shader = false;
-#endif
-
-using namespace SwrJit;
-
-static unsigned
-locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
-
-bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
-{
- return !memcmp(&lhs, &rhs, sizeof(lhs));
-}
-
-bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
-{
- return !memcmp(&lhs, &rhs, sizeof(lhs));
-}
-
-bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
-{
- return !memcmp(&lhs, &rhs, sizeof(lhs));
-}
-
-bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
-{
- return !memcmp(&lhs, &rhs, sizeof(lhs));
-}
-
-bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs)
-{
- return !memcmp(&lhs, &rhs, sizeof(lhs));
-}
-
-bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs)
-{
- return !memcmp(&lhs, &rhs, sizeof(lhs));
-}
-
-
-static void
-swr_generate_sampler_key(const struct lp_tgsi_info &info,
- struct swr_context *ctx,
- enum pipe_shader_type shader_type,
- struct swr_jit_sampler_key &key)
-{
- key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
-
- for (unsigned i = 0; i < key.nr_samplers; i++) {
- if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
- lp_sampler_static_sampler_state(
- &key.sampler[i].sampler_state,
- ctx->samplers[shader_type][i]);
- }
- }
-
- /*
- * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
- * are dx10-style? Can't really have mixed opcodes, at least not
- * if we want to skip the holes here (without rescanning tgsi).
- */
- if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
- key.nr_sampler_views =
- info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
- for (unsigned i = 0; i < key.nr_sampler_views; i++) {
- if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
- const struct pipe_sampler_view *view =
- ctx->sampler_views[shader_type][i];
- lp_sampler_static_texture_state(
- &key.sampler[i].texture_state, view);
- if (view) {
- struct swr_resource *swr_res = swr_resource(view->texture);
- const struct util_format_description *desc =
- util_format_description(view->format);
- if (swr_res->has_depth && swr_res->has_stencil &&
- !util_format_has_depth(desc))
- key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
- }
- }
- }
- } else {
- key.nr_sampler_views = key.nr_samplers;
- for (unsigned i = 0; i < key.nr_sampler_views; i++) {
- if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
- const struct pipe_sampler_view *view =
- ctx->sampler_views[shader_type][i];
- lp_sampler_static_texture_state(
- &key.sampler[i].texture_state, view);
- if (view) {
- struct swr_resource *swr_res = swr_resource(view->texture);
- const struct util_format_description *desc =
- util_format_description(view->format);
- if (swr_res->has_depth && swr_res->has_stencil &&
- !util_format_has_depth(desc))
- key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
- }
- }
- }
- }
-}
-
-void
-swr_generate_fs_key(struct swr_jit_fs_key &key,
- struct swr_context *ctx,
- swr_fragment_shader *swr_fs)
-{
- memset((void*)&key, 0, sizeof(key));
-
- key.nr_cbufs = ctx->framebuffer.nr_cbufs;
- key.light_twoside = ctx->rasterizer->light_twoside;
- key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
-
- struct tgsi_shader_info *pPrevShader;
- if (ctx->gs)
- pPrevShader = &ctx->gs->info.base;
- else if (ctx->tes)
- pPrevShader = &ctx->tes->info.base;
- else
- pPrevShader = &ctx->vs->info.base;
-
- memcpy(&key.vs_output_semantic_name,
- &pPrevShader->output_semantic_name,
- sizeof(key.vs_output_semantic_name));
- memcpy(&key.vs_output_semantic_idx,
- &pPrevShader->output_semantic_index,
- sizeof(key.vs_output_semantic_idx));
-
- swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
-
- key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
- ctx->poly_stipple.prim_is_poly;
-}
-
-void
-swr_generate_vs_key(struct swr_jit_vs_key &key,
- struct swr_context *ctx,
- swr_vertex_shader *swr_vs)
-{
- memset((void*)&key, 0, sizeof(key));
-
- key.clip_plane_mask =
- swr_vs->info.base.clipdist_writemask ?
- swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
- ctx->rasterizer->clip_plane_enable;
-
- swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
-}
-
-void
-swr_generate_fetch_key(struct swr_jit_fetch_key &key,
- struct swr_vertex_element_state *velems)
-{
- memset((void*)&key, 0, sizeof(key));
-
- key.fsState = velems->fsState;
-}
-
-void
-swr_generate_gs_key(struct swr_jit_gs_key &key,
- struct swr_context *ctx,
- swr_geometry_shader *swr_gs)
-{
- memset((void*)&key, 0, sizeof(key));
-
- struct tgsi_shader_info *pPrevShader = nullptr;
-
- if (ctx->tes) {
- pPrevShader = &ctx->tes->info.base;
- } else {
- pPrevShader = &ctx->vs->info.base;
- }
-
- memcpy(&key.vs_output_semantic_name,
- &pPrevShader->output_semantic_name,
- sizeof(key.vs_output_semantic_name));
- memcpy(&key.vs_output_semantic_idx,
- &pPrevShader->output_semantic_index,
- sizeof(key.vs_output_semantic_idx));
-
- swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
-}
-
-void
-swr_generate_tcs_key(struct swr_jit_tcs_key &key,
- struct swr_context *ctx,
- swr_tess_control_shader *swr_tcs)
-{
- memset((void*)&key, 0, sizeof(key));
-
- struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
-
- memcpy(&key.vs_output_semantic_name,
- &pPrevShader->output_semantic_name,
- sizeof(key.vs_output_semantic_name));
- memcpy(&key.vs_output_semantic_idx,
- &pPrevShader->output_semantic_index,
- sizeof(key.vs_output_semantic_idx));
-
- key.clip_plane_mask =
- swr_tcs->info.base.clipdist_writemask ?
- swr_tcs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
- ctx->rasterizer->clip_plane_enable;
-
- swr_generate_sampler_key(swr_tcs->info, ctx, PIPE_SHADER_TESS_CTRL, key);
-}
-
-void
-swr_generate_tes_key(struct swr_jit_tes_key &key,
- struct swr_context *ctx,
- swr_tess_evaluation_shader *swr_tes)
-{
- memset((void*)&key, 0, sizeof(key));
-
- struct tgsi_shader_info *pPrevShader = nullptr;
-
- if (ctx->tcs) {
- pPrevShader = &ctx->tcs->info.base;
- }
- else {
- pPrevShader = &ctx->vs->info.base;
- }
-
- SWR_ASSERT(pPrevShader != nullptr, "TES: No TCS or VS defined");
-
- memcpy(&key.prev_output_semantic_name,
- &pPrevShader->output_semantic_name,
- sizeof(key.prev_output_semantic_name));
- memcpy(&key.prev_output_semantic_idx,
- &pPrevShader->output_semantic_index,
- sizeof(key.prev_output_semantic_idx));
-
- key.clip_plane_mask =
- swr_tes->info.base.clipdist_writemask ?
- swr_tes->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
- ctx->rasterizer->clip_plane_enable;
-
- swr_generate_sampler_key(swr_tes->info, ctx, PIPE_SHADER_TESS_EVAL, key);
-}
-
-struct BuilderSWR : public Builder {
- BuilderSWR(JitManager *pJitMgr, const char *pName)
- : Builder(pJitMgr)
- {
- pJitMgr->SetupNewModule();
- gallivm = gallivm_create(pName, wrap(&JM()->mContext), NULL);
- pJitMgr->mpCurrentModule = unwrap(gallivm->module);
- }
-
- ~BuilderSWR() {
- gallivm_free_ir(gallivm);
- }
-
- void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
- unsigned slot, unsigned channel);
-
- struct gallivm_state *gallivm;
- PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
- PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
- PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
- PFN_TCS_FUNC CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key);
- PFN_TES_FUNC CompileTES(struct swr_context *ctx, swr_jit_tes_key &key);
-
- // GS-specific emit functions
- LLVMValueRef
- swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
- struct lp_build_context * bld,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index);
- void
- swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
- struct lp_build_context * bld,
- LLVMValueRef (*outputs)[4],
- LLVMValueRef emitted_vertices_vec,
- LLVMValueRef stream_id);
-
- void
- swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
- struct lp_build_context * bld,
- LLVMValueRef total_emitted_vertices_vec_ptr,
- LLVMValueRef verts_per_prim_vec,
- LLVMValueRef emitted_prims_vec,
- LLVMValueRef mask_vec);
-
- void
- swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
- LLVMValueRef total_emitted_vertices_vec,
- LLVMValueRef emitted_prims_vec, unsigned stream);
-
- // TCS-specific emit functions
- void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld);
- void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld);
-
- LLVMValueRef
- swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index);
-
- LLVMValueRef
- swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index,
- uint32_t name);
-
- void
- swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context * bld_base,
- unsigned name,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index,
- LLVMValueRef value,
- LLVMValueRef mask_vec);
-
- // Barrier implementation (available only in TCS)
- void
- swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context *bld_base);
-
- // TES-specific emit functions
- LLVMValueRef
- swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index);
-
- LLVMValueRef
- swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index);
-};
-
-struct swr_gs_llvm_iface {
- struct lp_build_gs_iface base;
- struct tgsi_shader_info *info;
-
- BuilderSWR *pBuilder;
-
- Value *pGsCtx;
- SWR_GS_STATE *pGsState;
- uint32_t num_outputs;
- uint32_t num_verts_per_prim;
-
- Value *pVtxAttribMap;
-};
-
-struct swr_tcs_llvm_iface {
- struct lp_build_tcs_iface base;
- struct tgsi_shader_info *info;
-
- BuilderSWR *pBuilder;
-
- Value *pTcsCtx;
- SWR_TS_STATE *pTsState;
-
- uint32_t output_vertices;
-
- LLVMValueRef loop_var;
-
- Value *pVtxAttribMap;
- Value *pVtxOutputAttribMap;
- Value *pPatchOutputAttribMap;
-};
-
-struct swr_tes_llvm_iface {
- struct lp_build_tes_iface base;
- struct tgsi_shader_info *info;
-
- BuilderSWR *pBuilder;
-
- Value *pTesCtx;
- SWR_TS_STATE *pTsState;
-
- uint32_t num_outputs;
-
- Value *pVtxAttribMap;
- Value *pPatchAttribMap;
-};
-
-// trampoline functions so we can use the builder llvm construction methods
-static LLVMValueRef
-swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
- struct lp_build_context * bld,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
-
- return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld,
- is_vindex_indirect,
- vertex_index,
- is_aindex_indirect,
- attrib_index,
- swizzle_index);
-}
-
-static void
-swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
- struct lp_build_context * bld,
- LLVMValueRef (*outputs)[4],
- LLVMValueRef emitted_vertices_vec,
- LLVMValueRef mask_vec,
- LLVMValueRef stream_id)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
-
- iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld,
- outputs,
- emitted_vertices_vec,
- stream_id);
-}
-
-static void
-swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
- struct lp_build_context * bld,
- LLVMValueRef total_emitted_vertices_vec_ptr,
- LLVMValueRef verts_per_prim_vec,
- LLVMValueRef emitted_prims_vec,
- LLVMValueRef mask_vec, unsigned stream_id)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
-
- iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld,
- total_emitted_vertices_vec_ptr,
- verts_per_prim_vec,
- emitted_prims_vec,
- mask_vec);
-}
-
-static void
-swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
- LLVMValueRef total_emitted_vertices_vec,
- LLVMValueRef emitted_prims_vec, unsigned stream)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
-
- iface->pBuilder->swr_gs_llvm_epilogue(gs_base,
- total_emitted_vertices_vec,
- emitted_prims_vec, stream);
-}
-
-static LLVMValueRef
-swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_context * bld,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- boolean is_sindex_indirect,
- LLVMValueRef swizzle_index)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
- struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
-
- return iface->pBuilder->swr_tcs_llvm_fetch_input(tcs_iface, bld_base,
- is_vindex_indirect,
- vertex_index,
- is_aindex_indirect,
- attrib_index,
- swizzle_index);
-}
-
-static LLVMValueRef
-swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_context * bld,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- boolean is_sindex_indirect,
- LLVMValueRef swizzle_index,
- uint32_t name)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
- struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
-
- return iface->pBuilder->swr_tcs_llvm_fetch_output(tcs_iface, bld_base,
- is_vindex_indirect,
- vertex_index,
- is_aindex_indirect,
- attrib_index,
- swizzle_index,
- name);
-}
-
-
-static void
-swr_tcs_llvm_emit_prologue(struct lp_build_context* bld)
-{
- lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
- iface->pBuilder->swr_tcs_llvm_emit_prologue(bld_base);
-}
-
-static void
-swr_tcs_llvm_emit_epilogue(struct lp_build_context* bld)
-{
- lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
- iface->pBuilder->swr_tcs_llvm_emit_epilogue(bld_base);
-}
-
-static
-void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_context * bld,
- unsigned name,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- boolean is_sindex_indirect,
- LLVMValueRef swizzle_index,
- LLVMValueRef value,
- LLVMValueRef mask_vec)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
- struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
-
- iface->pBuilder->swr_tcs_llvm_store_output(tcs_iface,
- bld_base,
- name,
- is_vindex_indirect,
- vertex_index,
- is_aindex_indirect,
- attrib_index,
- swizzle_index,
- value,
- mask_vec);
-}
-
-
-static
-void swr_tcs_llvm_emit_barrier(struct lp_build_context *bld)
-{
- lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
-
- iface->pBuilder->swr_tcs_llvm_emit_barrier(bld_base->tcs_iface, &bld_base->bld_base);
-}
-
-
-static LLVMValueRef
-swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
- struct lp_build_context * bld,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- boolean is_sindex_indirect,
- LLVMValueRef swizzle_index)
-{
- swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
- struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
-
- return iface->pBuilder->swr_tes_llvm_fetch_vtx_input(tes_iface, bld_base,
- is_vindex_indirect,
- vertex_index,
- is_aindex_indirect,
- attrib_index,
- swizzle_index);
-}
-
-static LLVMValueRef
-swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
- struct lp_build_context * bld,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index)
-{
- swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
- struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
-
- return iface->pBuilder->swr_tes_llvm_fetch_patch_input(tes_iface, bld_base,
- is_aindex_indirect,
- attrib_index,
- swizzle_index);
-}
-
-LLVMValueRef
-BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
- struct lp_build_context * bld,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
- Value *vert_index = unwrap(vertex_index);
- Value *attr_index = unwrap(attrib_index);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (is_vindex_indirect || is_aindex_indirect) {
- int i;
- Value *res = unwrap(bld->zero);
- struct lp_type type = bld->type;
-
- for (i = 0; i < type.length; i++) {
- Value *vert_chan_index = vert_index;
- Value *attr_chan_index = attr_index;
-
- if (is_vindex_indirect) {
- vert_chan_index = VEXTRACT(vert_index, C(i));
- }
- if (is_aindex_indirect) {
- attr_chan_index = VEXTRACT(attr_index, C(i));
- }
-
- Value *attrib =
- LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
-
- Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
- Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
-
- Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib);
- Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
-
- Value *value = VEXTRACT(pInput, C(i));
- res = VINSERT(res, value, C(i));
- }
-
- return wrap(res);
- } else {
- Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
-
- Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
- Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
-
- Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib);
-
- Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
-
- return wrap(pInput);
- }
-}
-
-// GS output stream layout
-#define VERTEX_COUNT_SIZE 32
-#define CONTROL_HEADER_SIZE (8*32)
-
-void
-BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
- struct lp_build_context * bld,
- LLVMValueRef (*outputs)[4],
- LLVMValueRef emitted_vertices_vec,
- LLVMValueRef stream_id)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
- const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
- const uint32_t attribSize = 4 * sizeof(float);
- const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
- Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
-
- Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
- Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, mVWidth));
-
- Value *pStack = STACKSAVE();
- Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
-
- for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
- uint32_t attribSlot = attrib;
- uint32_t sgvChannel = 0;
- if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
- attribSlot = VERTEX_SGV_SLOT;
- sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
- } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
- attribSlot = VERTEX_SGV_SLOT;
- sgvChannel = VERTEX_SGV_RTAI_COMP;
- } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) {
- attribSlot = VERTEX_SGV_SLOT;
- sgvChannel = VERTEX_SGV_VAI_COMP;
- } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
- attribSlot = VERTEX_POSITION_SLOT;
- } else {
- attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
- if (iface->info->writes_position) {
- attribSlot--;
- }
- }
-
- Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ?
-
- for (uint32_t lane = 0; lane < mVWidth; ++lane) {
- Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane));
- Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
- Value *pStreamOffset = GEP(pStream, pLaneOffset);
- pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy);
-
- Value *pLaneMask = VEXTRACT(vMask1, C(lane));
- pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
-
- for (uint32_t channel = 0; channel < 4; ++channel) {
- Value *vData;
-
- if (attribSlot == VERTEX_SGV_SLOT)
- vData = LOAD(unwrap(outputs[attrib][0]));
- else
- vData = LOAD(unwrap(outputs[attrib][channel]));
-
- if (attribSlot != VERTEX_SGV_SLOT ||
- sgvChannel == channel) {
- vData = VEXTRACT(vData, C(lane));
- STORE(vData, pStreamOffset);
- }
- pStreamOffset = GEP(pStreamOffset, C(1));
- }
- }
- }
-
- /* When the output type is not points, the geometry shader may not
- * output data to multiple streams. So early exit here.
- */
- if(iface->pGsState->outputTopology != TOP_POINT_LIST) {
- STACKRESTORE(pStack);
- return;
- }
-
- // Info about stream id for each vertex
- // is coded in 2 bits (4 vert per byte "box"):
- // ----------------- ----------------- ----
- // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
- // ----------------- ----------------- ----
-
- // Calculate where need to put stream id for current vert
- // in 1 byte "box".
- Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2));
-
- // Calculate in which box put stream id for current vert.
- Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2));
-
- // Skip count header
- Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE));
-
- for (uint32_t lane = 0; lane < mVWidth; ++lane) {
- Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty);
- Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
-
- Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane)));
-
- // Just make sure that not overflow max - stream id = (0,1,2,3)
- Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty);
-
- // Shift it to correct position in byte "box"
- vVal = SHL(vVal, pShift);
-
- // Info about other vertices can be already stored
- // so we need to read and add bits from current vert info.
- Value *storedValue = LOAD(pStreamOffset);
- vVal = OR(storedValue, vVal);
- STORE(vVal, pStreamOffset);
- }
-
- STACKRESTORE(pStack);
-}
-
-void
-BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
- struct lp_build_context * bld,
- LLVMValueRef total_emitted_vertices_vec,
- LLVMValueRef verts_per_prim_vec,
- LLVMValueRef emitted_prims_vec,
- LLVMValueRef mask_vec)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
-
- /* When the output type is points, the geometry shader may output data
- * to multiple streams, and end_primitive has no effect. Info about
- * stream id for vertices is stored into the same place in memory where
- * end primitive info is stored so early exit in this case.
- */
- if (iface->pGsState->outputTopology == TOP_POINT_LIST) {
- return;
- }
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
- Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, 8));
-
- uint32_t vertsPerPrim = iface->num_verts_per_prim;
-
- Value *vCount =
- ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
- unwrap(verts_per_prim_vec));
-
- vCount = unwrap(total_emitted_vertices_vec);
-
- Value *mask = unwrap(mask_vec);
- Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
- mask = AND(mask, cmpMask);
- vMask1 = TRUNC(mask, getVectorType(mInt1Ty, 8));
-
- vCount = SUB(vCount, VIMMED1(1));
- Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
- Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
-
- vValue = TRUNC(vValue, getVectorType(mInt8Ty, 8));
-
- Value *pStack = STACKSAVE();
- Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
-
- for (uint32_t lane = 0; lane < mVWidth; ++lane) {
- Value *vLaneOffset = VEXTRACT(vOffset, C(lane));
- Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
- Value *pStreamOffset = GEP(pStream, vLaneOffset);
-
- Value *pLaneMask = VEXTRACT(vMask1, C(lane));
- pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
-
- Value *vVal = LOAD(pStreamOffset);
- vVal = OR(vVal, VEXTRACT(vValue, C(lane)));
- STORE(vVal, pStreamOffset);
- }
-
- STACKRESTORE(pStack);
-}
-
-void
-BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
- LLVMValueRef total_emitted_vertices_vec,
- LLVMValueRef emitted_prims_vec, unsigned stream)
-{
- swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- // Store emit count to each output stream in the first DWORD
- for (uint32_t lane = 0; lane < mVWidth; ++lane)
- {
- Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
- pStream = BITCAST(pStream, mInt32PtrTy);
- Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane));
- STORE(pLaneCount, pStream);
- }
-}
-
-void
-BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
-
- Value* loop_var = ALLOCA(mSimdInt32Ty);
- STORE(VBROADCAST(C(0)), loop_var);
-
- iface->loop_var = wrap(loop_var);
-
- lp_exec_bgnloop(&bld->exec_mask, true);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
- bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var))));
-
- if (verbose_tcs_shader_loop) {
- lp_build_print_value(gallivm, "Prologue LOOP Iteration BEGIN:", bld->system_values.invocation_id);
- }
-
-}
-
-void
-BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
-
- struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
-
- STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var));
- if (verbose_tcs_shader_loop) {
- lp_build_print_value(gallivm, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface->loop_var))));
- }
-
- LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))),
- wrap(VBROADCAST(C(iface->output_vertices))));
- lp_exec_mask_cond_push(&bld->exec_mask, tmp);
- lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false);
- lp_exec_mask_cond_pop(&bld->exec_mask);
- lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask);
-}
-
-LLVMValueRef
-BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
-
- Value *vert_index = unwrap(vertex_index);
- Value *attr_index = unwrap(attrib_index);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (verbose_tcs_shader_in) {
- lp_build_printf(gallivm, "[TCS IN][VTX] ======================================\n");
- lp_build_print_value(gallivm, "[TCS IN][VTX] vertex_index: ", vertex_index);
- lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
- lp_build_printf(gallivm, "[TCS IN][VTX] --------------------------------------\n");
- }
-
- Value *res = unwrap(bld_base->base.zero);
- if (is_vindex_indirect || is_aindex_indirect) {
- int i;
- struct lp_type type = bld_base->base.type;
-
- for (i = 0; i < type.length; i++) {
- Value *vert_chan_index = vert_index;
- Value *attr_chan_index = attr_index;
-
- if (is_vindex_indirect) {
- vert_chan_index = VEXTRACT(vert_index, C(i));
- }
- if (is_aindex_indirect) {
- attr_chan_index = VEXTRACT(attr_index, C(i));
- }
-
- Value *attrib =
- LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
-
- Value *pBase = GEP(iface->pTcsCtx,
- { C(0), C(SWR_HS_CONTEXT_vert), vert_chan_index,
- C(simdvertex_attrib), attrib, unwrap(swizzle_index), C(i) });
-
- Value *val = LOAD(pBase);
-
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index));
- lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
- lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(val));
- }
- res = VINSERT(res, val, C(i));
- }
- } else {
- Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
-
- Value *pBase = GEP(iface->pTcsCtx,
- { C(0), C(SWR_HS_CONTEXT_vert), vert_index,
- C(simdvertex_attrib), attrib, unwrap(swizzle_index) });
-
- res = LOAD(pBase);
-
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
- lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(res));
- }
- }
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS IN][VTX] returning: ", wrap(res));
- }
- return wrap(res);
-}
-
-LLVMValueRef
-BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index,
- uint32_t name)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
-
- Value *vert_index = unwrap(vertex_index);
- Value *attr_index = unwrap(attrib_index);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT] Vertex index: ", vertex_index);
- lp_build_print_value(gallivm, "[TCS INOUT] Attrib index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TCS INOUT] Swizzle index: ", swizzle_index);
- }
-
- Value* res = unwrap(bld_base->base.zero);
-
- for (uint32_t lane = 0; lane < mVWidth; lane++) {
- Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
- Value* pCpOut = GEP(p1, {lane});
-
- Value *vert_chan_index = vert_index;
- Value *attr_chan_index = attr_index;
-
- if (is_vindex_indirect) {
- vert_chan_index = VEXTRACT(vert_index, C(lane));
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index));
- }
- }
-
- if (is_aindex_indirect) {
- attr_chan_index = VEXTRACT(attr_index, C(lane));
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index));
- }
- }
-
- if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
- Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
- Value* tessFactorArray = nullptr;
- if (name == TGSI_SEMANTIC_TESSOUTER) {
- tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
- } else {
- tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
- }
- Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
- res = VINSERT(res, LOAD(tessFactor), C(lane));
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane)));
- lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] loaded value: ", wrap(res));
- }
- } else if (name == TGSI_SEMANTIC_PATCH) {
- Value* attr_index_from_map = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_chan_index}));
- Value* attr_value = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index_from_map, unwrap(swizzle_index)});
- res = VINSERT(res, LOAD(attr_value), C(lane));
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map));
- lp_build_print_value(gallivm, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane)));
- lp_build_print_value(gallivm, "[TCS INOUT][PATCH] loaded value: ", wrap(res));
- }
- } else {
- // Generic attribute
- Value *attrib =
- LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_chan_index}));
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib));
- }
- Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_chan_index,
- C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
-
- res = VINSERT(res, LOAD(attr_chan), C(lane));
- if (verbose_tcs_shader_in) {
- lp_build_print_value(gallivm, "[TCS INOUT][VTX] loaded value: ", wrap(res));
- }
- }
- }
-
- return wrap(res);
-}
-
-void
-BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context *bld_base,
- unsigned name,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index,
- LLVMValueRef value,
- LLVMValueRef mask_vec)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
- struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (verbose_tcs_shader_out) {
- lp_build_printf(gallivm, "[TCS OUT] =============================================\n");
- }
-
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask);
- lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value);
- }
-
- Value *vert_index = unwrap(vertex_index);
- Value *attr_index = unwrap(attrib_index);
-
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index);
- lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index);
- }
-
- if (is_vindex_indirect) {
- vert_index = VEXTRACT(vert_index, C(0));
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index);
- }
- }
-
- if (is_aindex_indirect) {
- attr_index = VEXTRACT(attr_index, C(0));
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index));
- }
- }
-
- if (verbose_tcs_shader_out) {
- if (bld->exec_mask.has_mask) {
- lp_build_print_value(gallivm, "[TCS OUT] Exec mask: ", bld->exec_mask.exec_mask);
- }
- else {
- lp_build_printf(gallivm, "[TCS OUT] has no mask\n");
- }
- }
- for (uint32_t lane = 0; lane < mVWidth; lane++) {
- Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
- Value* pCpOut = GEP(p1, {lane});
-
- if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
- Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
- Value* tessFactorArray = nullptr;
- if (name == TGSI_SEMANTIC_TESSOUTER) {
- tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
- } else {
- tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
- }
- Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
- Value* valueToStore = VEXTRACT(unwrap(value), C(lane));
- valueToStore = BITCAST(valueToStore, mFP32Ty);
- if (mask_vec) {
- Value *originalVal = LOAD(tessFactor);
- Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
- valueToStore = SELECT(vMask, valueToStore, originalVal);
- }
- STORE(valueToStore, tessFactor);
- if (verbose_tcs_shader_out)
- {
- lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec);
- lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore));
- }
- } else if (name == TGSI_SEMANTIC_PATCH) {
- Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index}));
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect)));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect)));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib));
- }
- Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib});
- Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane)));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value);
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index);
- }
- value_to_store = BITCAST(value_to_store, mFP32Ty);
- if (mask_vec) {
- Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)});
- Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
- value_to_store = SELECT(vMask, value_to_store, originalVal);
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", mask_vec);
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask));
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store));
- }
- }
- STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)});
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store));
- }
- } else {
- Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
- Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
-
- if (verbose_tcs_shader_out) {
- lp_build_printf(gallivm, "[TCS OUT] Writting attribute\n");
- lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id);
- lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TCS OUT][VTX] chan_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TCS OUT][VTX] value: ", value);
- lp_build_print_value(gallivm, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store));
- }
-
- Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp),
- VEXTRACT(unwrap(bld->system_values.invocation_id), C(0)),
- C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
-
- // Mask output values if needed
- value_to_store = BITCAST(value_to_store, mFP32Ty);
- if (mask_vec) {
- Value *originalVal = LOAD(attr_chan);
- Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
- value_to_store = SELECT(vMask, value_to_store, originalVal);
- }
- STORE(value_to_store, attr_chan);
- if (verbose_tcs_shader_out) {
- lp_build_print_value(gallivm, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec);
- lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store));
- }
- }
- }
-}
-
-void
-BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
- struct lp_build_tgsi_context *bld_base)
-{
- swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
- struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
-
- if (verbose_tcs_shader_loop) {
- lp_build_print_value(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_var);
- }
-
- struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
-
- STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var));
-
- LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))),
- wrap(VBROADCAST(C(iface->output_vertices))));
-
- lp_exec_mask_cond_push(&bld->exec_mask, tmp);
- lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false);
- lp_exec_mask_cond_pop(&bld->exec_mask);
- lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- STORE(VBROADCAST(C(0)), unwrap(iface->loop_var));
- lp_exec_bgnloop(&bld->exec_mask, true);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var))));
-
- if (verbose_tcs_shader_loop) {
- lp_build_print_value(gallivm, "Barrier LOOP: Iteration BEGIN: ", iface->loop_var);
- lp_build_print_value(gallivm, "Barrier LOOP: InvocationId: \n", bld->system_values.invocation_id);
- }
-}
-
-
-LLVMValueRef
-BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index)
-{
- swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
- Value *attr_index = unwrap(attrib_index);
- Value *res = unwrap(bld_base->base.zero);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (verbose_shader) {
- lp_build_printf(gallivm, "[TES IN][PATCH] --------------------------------------\n");
- }
-
- if (is_aindex_indirect) {
- int i;
- struct lp_type type = bld_base->base.type;
-
- for (i = 0; i < type.length; i++) {
- Value *attr_chan_index = attr_index;
-
- if (is_aindex_indirect) {
- attr_chan_index = VEXTRACT(attr_index, C(i));
- }
-
- Value *attrib =
- LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_chan_index}));
-
- Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
- Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
- Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
- Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
- if (verbose_shader) {
- lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
- lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index));
- lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
- }
- res = VINSERT(res, Val, C(i));
- }
- } else {
- Value *attrib = LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_index}));
-
- Value *pCpIn = LOAD(iface->pTesCtx, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
- Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
- Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
- Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
- if (verbose_shader) {
- lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
- lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
- }
- res = VBROADCAST(Val);
- }
- if (verbose_shader) {
- lp_build_print_value(gallivm, "[TES IN][PATCH] returning: ", wrap(res));
- }
- return wrap(res);
-}
-
-
-
-LLVMValueRef
-BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
- struct lp_build_tgsi_context * bld_base,
- boolean is_vindex_indirect,
- LLVMValueRef vertex_index,
- boolean is_aindex_indirect,
- LLVMValueRef attrib_index,
- LLVMValueRef swizzle_index)
-{
- swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
- Value *vert_index = unwrap(vertex_index);
- Value *attr_index = unwrap(attrib_index);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (verbose_shader) {
- lp_build_printf(gallivm, "[TES IN][VTX] --------------------------------------\n");
- }
-
- Value *res = unwrap(bld_base->base.zero);
- if (is_vindex_indirect || is_aindex_indirect) {
- int i;
- struct lp_type type = bld_base->base.type;
-
- for (i = 0; i < type.length; i++) {
- Value *vert_chan_index = vert_index;
- Value *attr_chan_index = attr_index;
-
- if (is_vindex_indirect) {
- vert_chan_index = VEXTRACT(vert_index, C(i));
- }
- if (is_aindex_indirect) {
- attr_chan_index = VEXTRACT(attr_index, C(i));
- }
-
- Value *attrib =
- LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
-
- Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
- Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
- Value *pVertex = GEP(pCp, {(Value*)C(0), vert_chan_index});
- Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
- Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
- Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
- if (verbose_shader) {
- lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
- lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
- }
- res = VINSERT(res, Val, C(i));
- }
- } else {
- Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
-
- Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
- Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
- Value *pVertex = GEP(pCp, {(Value*)C(0), vert_index});
- Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
- Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
- Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
- if (verbose_shader) {
- lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
- lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
- lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
- lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
- lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
- }
- res = VBROADCAST(Val);
- }
- if (verbose_shader) {
- lp_build_print_value(gallivm, "[TES IN][VTX] returning: ", wrap(res));
- }
- return wrap(res);
-}
-
-
-
-
-PFN_GS_FUNC
-BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
-{
- SWR_GS_STATE *pGS = &ctx->gs->gsState;
- struct tgsi_shader_info *info = &ctx->gs->info.base;
-
- memset(pGS, 0, sizeof(*pGS));
-
- pGS->gsEnable = true;
-
- pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
- pGS->outputTopology =
- swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM], 0);
-
- /* It's +1 because emit_vertex in swr is always called exactly one time more
- * than max_vertices passed in Geometry Shader. We need to allocate more memory
- * to avoid crash/memory overwritten.
- */
- pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1;
- pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
-
- // If point primitive then assume to use multiple streams
- if(pGS->outputTopology == TOP_POINT_LIST) {
- pGS->isSingleStream = false;
- } else {
- pGS->isSingleStream = true;
- pGS->singleStreamID = 0;
- }
-
- pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
- pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
- pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
- pGS->controlDataSize = 8; // GS outputs max of 8 32B units
- pGS->controlDataOffset = VERTEX_COUNT_SIZE;
- pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
-
- pGS->allocationSize =
- VERTEX_COUNT_SIZE + // vertex count
- CONTROL_HEADER_SIZE + // control header
- (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
- pGS->maxNumVerts; // num verts
-
- struct swr_geometry_shader *gs = ctx->gs;
-
- LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-
- memset(outputs, 0, sizeof(outputs));
-
- AttrBuilder attrBuilder;
- attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
-
- std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
- PointerType::get(mInt8Ty, 0),
- PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
- FunctionType *vsFuncType =
- FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
-
- // create new vertex shader function
- auto pFunction = Function::Create(vsFuncType,
- GlobalValue::ExternalLinkage,
- "GS",
- JM()->mpCurrentModule);
-#if LLVM_VERSION_MAJOR < 5
- AttributeSet attrSet = AttributeSet::get(
- JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
- pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
-#else
- pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
-#endif
-
- BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
- IRB()->SetInsertPoint(block);
- LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
-
- auto argitr = pFunction->arg_begin();
- Value *hPrivateData = &*argitr++;
- hPrivateData->setName("hPrivateData");
- Value *pWorkerData = &*argitr++;
- pWorkerData->setName("pWorkerData");
- Value *pGsCtx = &*argitr++;
- pGsCtx->setName("gsCtx");
-
- Value *consts_ptr =
- GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
- consts_ptr->setName("gs_constants");
- Value *const_sizes_ptr =
- GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
- const_sizes_ptr->setName("num_gs_constants");
-
- struct lp_build_sampler_soa *sampler =
- swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
- assert(sampler != nullptr);
-
- struct lp_bld_tgsi_system_values system_values;
- memset(&system_values, 0, sizeof(system_values));
- system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
- system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
-
- std::vector<Constant*> mapConstants;
- Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
- for (unsigned slot = 0; slot < info->num_inputs; slot++) {
- ubyte semantic_name = info->input_semantic_name[slot];
- ubyte semantic_idx = info->input_semantic_index[slot];
-
- unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
- assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS);
-
- vs_slot += VERTEX_ATTRIB_START_SLOT;
-
- if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
- vs_slot--;
-
- if (semantic_name == TGSI_SEMANTIC_POSITION)
- vs_slot = VERTEX_POSITION_SLOT;
-
- STORE(C(vs_slot), vtxAttribMap, {0, slot});
- mapConstants.push_back(C(vs_slot));
- }
-
- struct lp_build_mask_context mask;
- Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
- lp_build_mask_begin(&mask, gallivm,
- lp_type_float_vec(32, 32 * 8), wrap(mask_val));
-
- // zero out cut buffer so we can load/modify/store bits
- for (uint32_t lane = 0; lane < mVWidth; ++lane)
- {
- Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
-#if LLVM_VERSION_MAJOR >= 10
- MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH));
-#else
- MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
-#endif
- }
-
- struct swr_gs_llvm_iface gs_iface;
- gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
- gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
- gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
- gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
- gs_iface.pBuilder = this;
- gs_iface.pGsCtx = pGsCtx;
- gs_iface.pGsState = pGS;
- gs_iface.num_outputs = gs->info.base.num_outputs;
- gs_iface.num_verts_per_prim =
- u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
- gs_iface.info = info;
- gs_iface.pVtxAttribMap = vtxAttribMap;
-
- struct lp_build_tgsi_params params;
- memset(&params, 0, sizeof(params));
- params.type = lp_type_float_vec(32, 32 * 8);
- params.mask = & mask;
- params.consts_ptr = wrap(consts_ptr);
- params.const_sizes_ptr = wrap(const_sizes_ptr);
- params.system_values = &system_values;
- params.inputs = inputs;
- params.context_ptr = wrap(hPrivateData);
- params.sampler = sampler;
- params.info = &gs->info.base;
- params.gs_iface = &gs_iface.base;
-
- lp_build_tgsi_soa(gallivm,
- gs->pipe.tokens,
- &params,
- outputs);
-
- lp_build_mask_end(&mask);
-
- sampler->destroy(sampler);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- RET_VOID();
-
- gallivm_verify_function(gallivm, wrap(pFunction));
- gallivm_compile_module(gallivm);
-
- PFN_GS_FUNC pFunc =
- (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
-
- debug_printf("geom shader %p\n", pFunc);
- assert(pFunc && "Error: GeomShader = NULL");
-
- JM()->mIsModuleFinalized = true;
-
- return pFunc;
-}
-
-PFN_TES_FUNC
-BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key)
-{
- SWR_TS_STATE *pTS = &ctx->tsState;
- struct tgsi_shader_info *info = &ctx->tes->info.base;
-
- // tessellation is enabled if TES is present
- // clear tessellation state here then
- memset(pTS, 0, sizeof(*pTS));
-
- pTS->tsEnable = true;
-
- unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
- unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
- bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
- bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
- SWR_TS_DOMAIN type = SWR_TS_ISOLINE;
- SWR_TS_PARTITIONING partitioning = SWR_TS_EVEN_FRACTIONAL;
- SWR_TS_OUTPUT_TOPOLOGY topology = SWR_TS_OUTPUT_POINT;
- PRIMITIVE_TOPOLOGY postDSTopology = TOP_POINT_LIST;
-
- // TESS_TODO: move this to helper functions to improve readability
- switch (tes_prim_mode) {
- case PIPE_PRIM_LINES:
- type = SWR_TS_ISOLINE;
- postDSTopology = TOP_LINE_LIST;
- break;
- case PIPE_PRIM_TRIANGLES:
- type = SWR_TS_TRI;
- postDSTopology = TOP_TRIANGLE_LIST;
- break;
- case PIPE_PRIM_QUADS:
- type = SWR_TS_QUAD;
- // See OpenGL spec - quads are tessellated into triangles
- postDSTopology = TOP_TRIANGLE_LIST;
- break;
- default:
- assert(0);
- }
-
- switch (tes_spacing) {
- case PIPE_TESS_SPACING_FRACTIONAL_ODD:
- partitioning = SWR_TS_ODD_FRACTIONAL;
- break;
- case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
- partitioning = SWR_TS_EVEN_FRACTIONAL;
- break;
- case PIPE_TESS_SPACING_EQUAL:
- partitioning = SWR_TS_INTEGER;
- break;
- default:
- assert(0);
- }
-
- if (tes_point_mode) {
- topology = SWR_TS_OUTPUT_POINT;
- postDSTopology = TOP_POINT_LIST;
- }
- else if (tes_prim_mode == PIPE_PRIM_LINES) {
- topology = SWR_TS_OUTPUT_LINE;
- }
- else if (tes_vertex_order_cw) {
- topology = SWR_TS_OUTPUT_TRI_CW;
- }
- else {
- topology = SWR_TS_OUTPUT_TRI_CCW;
- }
-
- pTS->domain = type;
- pTS->tsOutputTopology = topology;
- pTS->partitioning = partitioning;
- pTS->numDsOutputAttribs = info->num_outputs;
- pTS->postDSTopology = postDSTopology;
-
- pTS->dsAllocationSize = SWR_VTX_NUM_SLOTS * MAX_NUM_VERTS_PER_PRIM;
- pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
- pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
- pTS->dsOutVtxAttribOffset = VERTEX_ATTRIB_START_SLOT;
-
- struct swr_tess_evaluation_shader *tes = ctx->tes;
-
- LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-
- memset(outputs, 0, sizeof(outputs));
-
- AttrBuilder attrBuilder;
- attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
-
- std::vector<Type *> tesArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
- PointerType::get(mInt8Ty, 0),
- PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
- FunctionType *tesFuncType =
- FunctionType::get(Type::getVoidTy(JM()->mContext), tesArgs, false);
-
- // create new vertex shader function
- auto pFunction = Function::Create(tesFuncType,
- GlobalValue::ExternalLinkage,
- "TES",
- JM()->mpCurrentModule);
-
-#if LLVM_VERSION_MAJOR < 5
- AttributeSet attrSet = AttributeSet::get(
- JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
- pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
-#else
- pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
-#endif
-
- BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
- IRB()->SetInsertPoint(block);
- LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
-
- auto argitr = pFunction->arg_begin();
- Value *hPrivateData = &*argitr++;
- hPrivateData->setName("hPrivateData");
- Value *pWorkerData = &*argitr++;
- pWorkerData->setName("pWorkerData");
- Value *pTesCtx = &*argitr++;
- pTesCtx->setName("tesCtx");
-
- Value *consts_ptr =
- GEP(hPrivateData, {C(0), C(swr_draw_context_constantTES)});
- consts_ptr->setName("tes_constants");
- Value *const_sizes_ptr =
- GEP(hPrivateData, {0, swr_draw_context_num_constantsTES});
- const_sizes_ptr->setName("num_tes_constants");
-
- struct lp_build_sampler_soa *sampler =
- swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_EVAL);
- assert(sampler != nullptr);
-
- struct lp_bld_tgsi_system_values system_values;
- memset(&system_values, 0, sizeof(system_values));
-
- // Load and calculate system values
- // Tessellation coordinates (gl_TessCoord)
- Value *vecOffset = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}, "vecOffset");
- Value *vecStride = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorStride}, "vecStride");
- Value *vecIndex = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset});
-
- Value* tess_coord = ALLOCA(ArrayType::get(mSimdFP32Ty, 3));
-
- Value *tessCoordU = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainU}), {vecIndex}, "tessCoordU");
- STORE(tessCoordU, tess_coord, {0, 0});
- Value *tessCoordV = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainV}), {vecIndex}, "tessCoordV");
- STORE(tessCoordV, tess_coord, {0, 1});
- Value *tessCoordW = FSUB(FSUB(VIMMED1(1.0f), tessCoordU), tessCoordV, "tessCoordW");
- STORE(tessCoordW, tess_coord, {0, 2});
- system_values.tess_coord = wrap(tess_coord);
-
- // Primitive ID
- system_values.prim_id = wrap(VBROADCAST(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_PrimitiveID}), "PrimitiveID"));
-
- // Tessellation factors
- Value* pPatch = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pCpIn});
- Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)});
-
- assert(SWR_NUM_OUTER_TESS_FACTORS == 4);
- Value* sys_value_outer_factors = UndefValue::get(getVectorType(mFP32Ty, 4));
- for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) {
- Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i});
- sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter");
- }
- system_values.tess_outer = wrap(sys_value_outer_factors);
-
- assert(SWR_NUM_INNER_TESS_FACTORS == 2);
- Value* sys_value_inner_factors = UndefValue::get(getVectorType(mFP32Ty, 4));
- for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) {
- Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i});
- sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner");
- }
- system_values.tess_inner = wrap(sys_value_inner_factors);
-
- if (verbose_shader)
- {
- lp_build_print_value(gallivm, "tess_coord = ", system_values.tess_coord);
- }
-
- struct tgsi_shader_info *pPrevShader = nullptr;
-
- if (ctx->tcs) {
- pPrevShader = &ctx->tcs->info.base;
- }
- else {
- pPrevShader = &ctx->vs->info.base;
- }
-
- // Figure out how many per-patch attributes we have
- unsigned perPatchAttrs = 0;
- unsigned genericAttrs = 0;
- unsigned tessLevelAttrs = 0;
- unsigned sgvAttrs = 0;
- for (unsigned slot = 0; slot < pPrevShader->num_outputs; slot++) {
- switch (pPrevShader->output_semantic_name[slot]) {
- case TGSI_SEMANTIC_PATCH:
- perPatchAttrs++;
- break;
- case TGSI_SEMANTIC_GENERIC:
- genericAttrs++;
- break;
- case TGSI_SEMANTIC_TESSINNER:
- case TGSI_SEMANTIC_TESSOUTER:
- tessLevelAttrs++;
- break;
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_CLIPDIST:
- case TGSI_SEMANTIC_PSIZE:
- sgvAttrs++;
- break;
- default:
- assert(!"Unknown semantic input in TES");
- }
- }
-
- std::vector<Constant *> mapConstants;
- Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
- Value *patchAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
- for (unsigned slot = 0; slot < info->num_inputs; slot++) {
- ubyte semantic_name = info->input_semantic_name[slot];
- ubyte semantic_idx = info->input_semantic_index[slot];
-
- // Where in TCS output is my attribute?
- // TESS_TODO: revisit after implement pass-through TCS
- unsigned tcs_slot = locate_linkage(semantic_name, semantic_idx, pPrevShader);
- assert(tcs_slot < PIPE_MAX_SHADER_OUTPUTS);
-
- // Skip tessellation levels - these go to the tessellator, not TES
- switch (semantic_name) {
- case TGSI_SEMANTIC_GENERIC:
- tcs_slot = tcs_slot + VERTEX_ATTRIB_START_SLOT - sgvAttrs - tessLevelAttrs;
- break;
- case TGSI_SEMANTIC_PATCH:
- tcs_slot = semantic_idx;
- break;
- case TGSI_SEMANTIC_POSITION:
- tcs_slot = VERTEX_POSITION_SLOT;
- break;
- case TGSI_SEMANTIC_CLIPDIST:
- case TGSI_SEMANTIC_PSIZE:
- break;
- default:
- assert(!"Unexpected semantic found while building TES input map");
- }
- if (semantic_name == TGSI_SEMANTIC_PATCH) {
- STORE(C(tcs_slot), patchAttribMap, {0, slot});
- } else {
- STORE(C(tcs_slot), vtxAttribMap, {0, slot});
- }
- mapConstants.push_back(C(tcs_slot));
- }
-
- // Build execution mask
- struct lp_build_mask_context mask;
- Value *mask_val = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_mask}, "tesMask");
-
- if (verbose_shader)
- lp_build_print_value(gallivm, "TES execution mask: ", wrap(mask_val));
-
- lp_build_mask_begin(&mask, gallivm,
- lp_type_float_vec(32, 32 * 8), wrap(mask_val));
-
- struct swr_tes_llvm_iface tes_iface;
-
- tes_iface.base.fetch_vertex_input = ::swr_tes_llvm_fetch_vtx_input;
- tes_iface.base.fetch_patch_input = ::swr_tes_llvm_fetch_patch_input;
-
- tes_iface.pBuilder = this;
- tes_iface.pTesCtx = pTesCtx;
- tes_iface.pTsState = pTS;
- tes_iface.num_outputs = tes->info.base.num_outputs;
- tes_iface.info = info;
- tes_iface.pVtxAttribMap = vtxAttribMap;
- tes_iface.pPatchAttribMap = patchAttribMap;
-
- struct lp_build_tgsi_params params;
- memset(&params, 0, sizeof(params));
- params.type = lp_type_float_vec(32, 32 * 8);
- params.mask = & mask;
- params.consts_ptr = wrap(consts_ptr);
- params.const_sizes_ptr = wrap(const_sizes_ptr);
- params.system_values = &system_values;
- params.inputs = inputs;
- params.context_ptr = wrap(hPrivateData);
- params.sampler = sampler;
- params.info = &tes->info.base;
- params.tes_iface = &tes_iface.base;
-
- // Build LLVM IR
- lp_build_tgsi_soa(gallivm,
- tes->pipe.tokens,
- &params,
- outputs);
-
- lp_build_mask_end(&mask);
-
- sampler->destroy(sampler);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- // Write output attributes
- Value *dclOut = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pOutputData}, "dclOut");
-
- for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
- for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
- if (!outputs[attrib][channel])
- continue;
-
- Value *val = LOAD(unwrap(outputs[attrib][channel]));;
- Value *attribOffset =
- LOAD(pTesCtx, {0, SWR_DS_CONTEXT_outVertexAttribOffset});
-
- // Assume we write possition
- Value* outputSlot = C(VERTEX_POSITION_SLOT);
- if (tes->info.base.output_semantic_name[attrib] != TGSI_SEMANTIC_POSITION) {
- // No, it's a generic attribute, not a position - let's calculate output slot
- uint32_t outSlot = attrib;
- if (tes->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
- // this shader will write position, so in shader's term
- // output starts at attrib 1, but we will handle that separately,
- // so let's fix the outSlot
- outSlot--;
- }
- outputSlot = ADD(attribOffset, C(outSlot));
- }
-
- Value *attribVecIndex =
- ADD(MUL(vecStride, MUL(outputSlot, C(4))), vecOffset);
-
- uint32_t outputComponent = 0;
- uint32_t curComp = outputComponent + channel;
- auto outValIndex = ADD(attribVecIndex, MUL(vecStride, C(curComp)));
- STOREV(val, dclOut, {outValIndex});
-
- if (verbose_shader) {
- lp_build_printf(gallivm,
- "TES output [%d][%d]",
- C(attrib),
- C(channel));
- lp_build_print_value(gallivm, " = ", wrap(val));
- }
- }
- }
-
- RET_VOID();
-
- JM()->DumpToFile(pFunction, "src");
- gallivm_verify_function(gallivm, wrap(pFunction));
-
- gallivm_compile_module(gallivm);
- JM()->DumpToFile(pFunction, "optimized");
-
- PFN_TES_FUNC pFunc =
- (PFN_TES_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
-
- debug_printf("tess evaluation shader %p\n", pFunc);
- assert(pFunc && "Error: TessEvaluationShader = NULL");
-
- JM()->DumpAsm(pFunction, "asm");
-
- JM()->mIsModuleFinalized = true;
-
- return pFunc;
-}
-
-PFN_TCS_FUNC
-BuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key)
-{
- SWR_TS_STATE *pTS = &ctx->tsState;
- struct tgsi_shader_info *info = &ctx->tcs->info.base;
-
- pTS->numHsInputAttribs = info->num_inputs;
- pTS->numHsOutputAttribs = info->num_outputs;
-
- pTS->hsAllocationSize = sizeof(ScalarPatch);
-
- pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
- pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
-
- struct swr_tess_control_shader *tcs = ctx->tcs;
-
- LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-
- memset(outputs, 0, sizeof(outputs));
-
- AttrBuilder attrBuilder;
- attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
-
- std::vector<Type *> tcsArgs{
- PointerType::get(Gen_swr_draw_context(JM()), 0),
- PointerType::get(mInt8Ty, 0),
- PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
- FunctionType *tcsFuncType =
- FunctionType::get(Type::getVoidTy(JM()->mContext), tcsArgs, false);
-
- // create new vertex shader function
- auto pFunction = Function::Create(tcsFuncType,
- GlobalValue::ExternalLinkage,
- "TCS",
- JM()->mpCurrentModule);
-
-#if LLVM_VERSION_MAJOR < 5
- AttributeSet attrSet = AttributeSet::get(
- JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
- pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
-#else
- pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
-#endif
-
- BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
- IRB()->SetInsertPoint(block);
- LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
-
- auto argitr = pFunction->arg_begin();
- Value *hPrivateData = &*argitr++;
- hPrivateData->setName("hPrivateData");
- Value *pWorkerData = &*argitr++;
- pWorkerData->setName("pWorkerData");
- Value *pTcsCtx = &*argitr++;
- pTcsCtx->setName("tcsCtx");
-
- Value *consts_ptr =
- GEP(hPrivateData, {C(0), C(swr_draw_context_constantTCS)});
- consts_ptr->setName("tcs_constants");
- Value *const_sizes_ptr =
- GEP(hPrivateData, {0, swr_draw_context_num_constantsTCS});
- const_sizes_ptr->setName("num_tcs_constants");
-
- struct lp_build_sampler_soa *sampler =
- swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_CTRL);
- assert(sampler != nullptr);
-
- struct lp_bld_tgsi_system_values system_values;
- memset(&system_values, 0, sizeof(system_values));
-
- system_values.prim_id =
- wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID}));
-
- system_values.invocation_id = wrap(VBROADCAST(C(0)));
- system_values.vertices_in = wrap(C(tcs->vertices_per_patch));
-
- if (verbose_shader) {
- lp_build_print_value(gallivm, "TCS::prim_id = ", system_values.prim_id);
- lp_build_print_value(gallivm, "TCS::invocation_id = ", system_values.invocation_id);
- lp_build_print_value(gallivm, "TCS::vertices_in = ", system_values.vertices_in);
- }
-
- std::vector<Constant *> mapConstants;
- Value *vtxAttribMap =
- ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
-
- for (unsigned slot = 0; slot < info->num_inputs; slot++) {
- ubyte semantic_name = info->input_semantic_name[slot];
- ubyte semantic_idx = info->input_semantic_index[slot];
-
- unsigned vs_slot =
- locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
- assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS);
-
- vs_slot += VERTEX_ATTRIB_START_SLOT;
-
- if (ctx->vs->info.base.output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION)
- vs_slot--;
-
- if (semantic_name == TGSI_SEMANTIC_POSITION)
- vs_slot = VERTEX_POSITION_SLOT;
-
- STORE(C(vs_slot), vtxAttribMap, {0, slot});
- mapConstants.push_back(C(vs_slot));
- }
-
- // Prepare map of output attributes. Needed when shader instance wants
- // to read own output or output of other instance, which is allowed in TCS
- Value *vtxOutputAttribMap =
- ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
- // Map for per-patch attributes
- Value *patchOutputAttribMap =
- ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
- for (unsigned slot = 0; slot < info->num_outputs; slot++) {
- ubyte name = info->output_semantic_name[slot];
- int32_t idx = info->output_semantic_index[slot];
- if (name == TGSI_SEMANTIC_PATCH) {
- STORE(C(idx), patchOutputAttribMap, {0, slot});
- } else {
- int32_t target_slot = slot;
- if (name == TGSI_SEMANTIC_GENERIC) {
- target_slot += VERTEX_ATTRIB_START_SLOT;
- }
- // Now normalize target slot
- for (ubyte as = 0; as < slot; as++) {
- ubyte name = info->output_semantic_name[as];
- switch (name) {
- case TGSI_SEMANTIC_TESSOUTER:
- case TGSI_SEMANTIC_TESSINNER:
- case TGSI_SEMANTIC_PATCH:
- case TGSI_SEMANTIC_POSITION:
- target_slot--;
- }
- }
- if (name == TGSI_SEMANTIC_POSITION) {
- target_slot = VERTEX_POSITION_SLOT;
- }
- STORE(C(target_slot), vtxOutputAttribMap, {0, slot});
- mapConstants.push_back(C(target_slot));
- }
- }
-
- struct lp_build_mask_context mask;
- Value *mask_val = LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_mask}, "tcsMask");
- lp_build_mask_begin(
- &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
-
- struct swr_tcs_llvm_iface tcs_iface;
-
- tcs_iface.base.emit_store_output = ::swr_tcs_llvm_store_output;
- tcs_iface.base.emit_fetch_input = ::swr_tcs_llvm_fetch_input;
- tcs_iface.base.emit_fetch_output = ::swr_tcs_llvm_fetch_output;
- tcs_iface.base.emit_barrier = ::swr_tcs_llvm_emit_barrier;
- tcs_iface.base.emit_prologue = ::swr_tcs_llvm_emit_prologue;
- tcs_iface.base.emit_epilogue = ::swr_tcs_llvm_emit_epilogue;
-
- tcs_iface.pBuilder = this;
- tcs_iface.pTcsCtx = pTcsCtx;
- tcs_iface.pTsState = pTS;
- tcs_iface.output_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
- tcs_iface.info = info;
- tcs_iface.pVtxAttribMap = vtxAttribMap;
- tcs_iface.pVtxOutputAttribMap = vtxOutputAttribMap;
- tcs_iface.pPatchOutputAttribMap = patchOutputAttribMap;
-
- struct lp_build_tgsi_params params;
- memset(&params, 0, sizeof(params));
- params.type = lp_type_float_vec(32, 32 * 8);
- params.mask = &mask;
- params.consts_ptr = wrap(consts_ptr);
- params.const_sizes_ptr = wrap(const_sizes_ptr);
- params.system_values = &system_values;
- params.inputs = inputs;
- params.context_ptr = wrap(hPrivateData);
- params.sampler = sampler;
- params.info = &tcs->info.base;
- params.tcs_iface = &tcs_iface.base;
-
- lp_build_tgsi_soa(gallivm, tcs->pipe.tokens, &params, outputs);
-
- lp_build_mask_end(&mask);
-
- sampler->destroy(sampler);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
- RET_VOID();
-
- JM()->DumpToFile(pFunction, "src");
- gallivm_verify_function(gallivm, wrap(pFunction));
- gallivm_compile_module(gallivm);
- JM()->DumpToFile(pFunction, "optimized");
-
- PFN_TCS_FUNC pFunc =
- (PFN_TCS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
-
- debug_printf("tess control shader %p\n", pFunc);
- assert(pFunc && "Error: TessControlShader = NULL");
- JM()->DumpAsm(pFunction, "asm");
-
- JM()->mIsModuleFinalized = true;
-
- return pFunc;
-}
-
-
-PFN_GS_FUNC
-swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
-{
- BuilderSWR builder(
- reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
- "GS");
- PFN_GS_FUNC func = builder.CompileGS(ctx, key);
-
- ctx->gs->map.insert(std::make_pair(key, std::unique_ptr<VariantGS>(new VariantGS(builder.gallivm, func))));
- return func;
-}
-
-PFN_TCS_FUNC
-swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key)
-{
- BuilderSWR builder(
- reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
- "TCS");
- PFN_TCS_FUNC func = builder.CompileTCS(ctx, key);
-
- ctx->tcs->map.insert(
- std::make_pair(key, std::unique_ptr<VariantTCS>(new VariantTCS(builder.gallivm, func))));
-
- return func;
-}
-
-PFN_TES_FUNC
-swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key)
-{
- BuilderSWR builder(
- reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
- "TES");
- PFN_TES_FUNC func = builder.CompileTES(ctx, key);
-
- ctx->tes->map.insert(
- std::make_pair(key, std::unique_ptr<VariantTES>(new VariantTES(builder.gallivm, func))));
-
- return func;
-}
-
-void
-BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
-{
-#if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
- // interleave the simdvertex components into the dest simd16vertex
- // slot16offset = slot8offset * 2
- // comp16offset = comp8offset * 2 + alternateOffset
-
- Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
- Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
- STORE(pVal, pOut, {channel * 2});
-#else
- Value *pOut = GEP(pVtxOutput, {0, 0, slot});
- STORE(pVal, pOut, {0, channel});
- if (verbose_vs_shader) {
- lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel));
- lp_build_print_value(gallivm, "", wrap(pVal));
- }
-#endif
-}
-
-PFN_VERTEX_FUNC
-BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
-{
- struct swr_vertex_shader *swr_vs = ctx->vs;
-
- LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-
- memset(outputs, 0, sizeof(outputs));
-
- AttrBuilder attrBuilder;
- attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
-
- std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
- PointerType::get(mInt8Ty, 0),
- PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
- FunctionType *vsFuncType =
- FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
-
- // create new vertex shader function
- auto pFunction = Function::Create(vsFuncType,
- GlobalValue::ExternalLinkage,
- "VS",
- JM()->mpCurrentModule);
-#if LLVM_VERSION_MAJOR < 5
- AttributeSet attrSet = AttributeSet::get(
- JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
- pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
-#else
- pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
-#endif
-
- BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
- IRB()->SetInsertPoint(block);
- LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
-
- auto argitr = pFunction->arg_begin();
- Value *hPrivateData = &*argitr++;
- hPrivateData->setName("hPrivateData");
- Value *pWorkerData = &*argitr++;
- pWorkerData->setName("pWorkerData");
- Value *pVsCtx = &*argitr++;
- pVsCtx->setName("vsCtx");
-
- Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
-
- consts_ptr->setName("vs_constants");
- Value *const_sizes_ptr =
- GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
- const_sizes_ptr->setName("num_vs_constants");
-
- Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
-#if USE_SIMD16_VS
- vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
-#endif
-
- for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
- const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
- for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
- if (mask & (1 << channel)) {
- inputs[attrib][channel] =
- wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
- }
- }
- }
-
- struct lp_build_sampler_soa *sampler =
- swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
- assert(sampler != nullptr);
-
- struct lp_bld_tgsi_system_values system_values;
- memset(&system_values, 0, sizeof(system_values));
- system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
-
-#if USE_SIMD16_VS
- system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
-#else
- system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
-#endif
-
-#if USE_SIMD16_VS
- uint32_t vectorWidth = mVWidth16;
-#else
- uint32_t vectorWidth = mVWidth;
-#endif
-
- struct lp_build_tgsi_params params;
- memset(&params, 0, sizeof(params));
- params.type = lp_type_float_vec(32, 32 * vectorWidth);
- params.consts_ptr = wrap(consts_ptr);
- params.const_sizes_ptr = wrap(const_sizes_ptr);
- params.system_values = &system_values;
- params.inputs = inputs;
- params.context_ptr = wrap(hPrivateData);
- params.sampler = sampler;
- params.info = &swr_vs->info.base;
-
- lp_build_tgsi_soa(gallivm,
- swr_vs->pipe.tokens,
- &params,
- outputs);
-
- sampler->destroy(sampler);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
-#if USE_SIMD16_VS
- vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
-#endif
-
- for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
- for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
- if (!outputs[attrib][channel])
- continue;
-
- Value *val;
- uint32_t outSlot;
-
- if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
- if (channel != VERTEX_SGV_POINT_SIZE_COMP)
- continue;
- val = LOAD(unwrap(outputs[attrib][0]));
- outSlot = VERTEX_SGV_SLOT;
- } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
- val = LOAD(unwrap(outputs[attrib][channel]));
- outSlot = VERTEX_POSITION_SLOT;
- } else {
- val = LOAD(unwrap(outputs[attrib][channel]));
- outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
- if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
- outSlot--;
- }
-
- WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
- }
- }
-
- if (ctx->rasterizer->clip_plane_enable ||
- swr_vs->info.base.culldist_writemask) {
- unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
-
- unsigned cv = 0;
- if (swr_vs->info.base.writes_clipvertex) {
- cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
- &swr_vs->info.base);
- } else {
- for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
- if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
- swr_vs->info.base.output_semantic_index[i] == 0) {
- cv = i;
- break;
- }
- }
- }
- assert(cv < PIPE_MAX_SHADER_OUTPUTS);
- LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
- LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
- LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
- LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
-
- tgsi_shader_info *pLastFE = &ctx->vs->info.base;
-
- if (ctx->gs) {
- pLastFE = &ctx->gs->info.base;
- }
- else if (ctx->tes) {
- pLastFE = &ctx->tes->info.base;
- }
- else if (ctx->tcs) {
- pLastFE = &ctx->tcs->info.base;
- }
-
- for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
- // clip distance overrides user clip planes
- if ((pLastFE->clipdist_writemask & clip_mask & (1 << val)) ||
- ((pLastFE->culldist_writemask << pLastFE->num_written_clipdistance) & (1 << val))) {
- unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, pLastFE);
- assert(cv < PIPE_MAX_SHADER_OUTPUTS);
- if (val < 4) {
- LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
- WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
- } else {
- LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
- WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
- }
- continue;
- }
-
- if (!(clip_mask & (1 << val)))
- continue;
-
- Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
- Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
- Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
- Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
-#if USE_SIMD16_VS
- Value *bpx = VBROADCAST_16(px);
- Value *bpy = VBROADCAST_16(py);
- Value *bpz = VBROADCAST_16(pz);
- Value *bpw = VBROADCAST_16(pw);
-#else
- Value *bpx = VBROADCAST(px);
- Value *bpy = VBROADCAST(py);
- Value *bpz = VBROADCAST(pz);
- Value *bpw = VBROADCAST(pw);
-#endif
- Value *dist = FADD(FMUL(unwrap(cx), bpx),
- FADD(FMUL(unwrap(cy), bpy),
- FADD(FMUL(unwrap(cz), bpz),
- FMUL(unwrap(cw), bpw))));
-
- if (val < 4)
- WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
- else
- WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
- }
- }
-
- RET_VOID();
-
- JM()->DumpToFile(pFunction, "vs_function1");
- gallivm_verify_function(gallivm, wrap(pFunction));
- gallivm_compile_module(gallivm);
- JM()->DumpToFile(pFunction, "vs_function2");
-
- // lp_debug_dump_value(func);
-
- PFN_VERTEX_FUNC pFunc =
- (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
-
- JM()->DumpAsm(pFunction, "vs_function_asm");
- debug_printf("vert shader %p\n", pFunc);
- assert(pFunc && "Error: VertShader = NULL");
-
- JM()->mIsModuleFinalized = true;
-
- return pFunc;
-}
-
-PFN_VERTEX_FUNC
-swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
-{
- if (!ctx->vs->pipe.tokens)
- return NULL;
-
- BuilderSWR builder(
- reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
- "VS");
- PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
-
- ctx->vs->map.insert(std::make_pair(key, std::unique_ptr<VariantVS>(new VariantVS(builder.gallivm, func))));
- return func;
-}
-
-unsigned
-swr_so_adjust_attrib(unsigned in_attrib,
- swr_vertex_shader *swr_vs)
-{
- ubyte semantic_name;
- unsigned attrib;
-
- attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
-
- if (swr_vs) {
- semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
- if (semantic_name == TGSI_SEMANTIC_POSITION) {
- attrib = VERTEX_POSITION_SLOT;
- } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
- attrib = VERTEX_SGV_SLOT;
- } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
- attrib = VERTEX_SGV_SLOT;
- } else {
- if (swr_vs->info.base.writes_position) {
- attrib--;
- }
- }
- }
-
- return attrib;
-}
-
-static unsigned
-locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
-{
- for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
- if ((info->output_semantic_name[i] == name)
- && (info->output_semantic_index[i] == index)) {
- return i;
- }
- }
-
- return 0xFFFFFFFF;
-}
-
-PFN_PIXEL_KERNEL
-BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
-{
- struct swr_fragment_shader *swr_fs = ctx->fs;
-
- struct tgsi_shader_info *pPrevShader;
- if (ctx->gs)
- pPrevShader = &ctx->gs->info.base;
- else if (ctx->tes)
- pPrevShader = &ctx->tes->info.base;
- else
- pPrevShader = &ctx->vs->info.base;
-
- LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-
- memset(inputs, 0, sizeof(inputs));
- memset(outputs, 0, sizeof(outputs));
-
- struct lp_build_sampler_soa *sampler = NULL;
-
- AttrBuilder attrBuilder;
- attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
-
- std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
- PointerType::get(mInt8Ty, 0),
- PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
- FunctionType *funcType =
- FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
-
- auto pFunction = Function::Create(funcType,
- GlobalValue::ExternalLinkage,
- "FS",
- JM()->mpCurrentModule);
-#if LLVM_VERSION_MAJOR < 5
- AttributeSet attrSet = AttributeSet::get(
- JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
- pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
-#else
- pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
-#endif
-
- BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
- IRB()->SetInsertPoint(block);
- LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
-
- auto args = pFunction->arg_begin();
- Value *hPrivateData = &*args++;
- hPrivateData->setName("hPrivateData");
- Value *pWorkerData = &*args++;
- pWorkerData->setName("pWorkerData");
- Value *pPS = &*args++;
- pPS->setName("psCtx");
-
- Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
- consts_ptr->setName("fs_constants");
- Value *const_sizes_ptr =
- GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
- const_sizes_ptr->setName("num_fs_constants");
-
- // load *pAttribs, *pPerspAttribs
- Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
- Value *pPerspAttribs =
- LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
-
- swr_fs->constantMask = 0;
- swr_fs->flatConstantMask = 0;
- swr_fs->pointSpriteMask = 0;
-
- for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
- const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
- const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
- const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
-
- if (!mask)
- continue;
-
- // load i,j
- Value *vi = nullptr, *vj = nullptr;
- switch (interpLoc) {
- case TGSI_INTERPOLATE_LOC_CENTER:
- vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
- vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
- vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
- break;
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
- vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
- break;
- }
-
- // load/compute w
- Value *vw = nullptr, *pAttribs;
- if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
- interpMode == TGSI_INTERPOLATE_COLOR) {
- pAttribs = pPerspAttribs;
- switch (interpLoc) {
- case TGSI_INTERPOLATE_LOC_CENTER:
- vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
- break;
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
- break;
- }
- } else {
- pAttribs = pRawAttribs;
- vw = VIMMED1(1.f);
- }
-
- vw->setName("w");
-
- ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
- ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
-
- if (semantic_name == TGSI_SEMANTIC_FACE) {
- Value *ff =
- UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
- ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
- ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
-
- inputs[attrib][0] = wrap(ff);
- inputs[attrib][1] = wrap(VIMMED1(0.0f));
- inputs[attrib][2] = wrap(VIMMED1(0.0f));
- inputs[attrib][3] = wrap(VIMMED1(1.0f));
- continue;
- } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
- if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
- TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
- inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
- inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
- } else {
- inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
- inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
- }
- inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
- inputs[attrib][3] =
- wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
- continue;
- } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer
- Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex});
- ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex");
- inputs[attrib][0] = wrap(ff);
- inputs[attrib][1] = wrap(VIMMED1(0.0f));
- inputs[attrib][2] = wrap(VIMMED1(0.0f));
- inputs[attrib][3] = wrap(VIMMED1(0.0f));
- continue;
- } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex
- Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex});
- ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex");
- inputs[attrib][0] = wrap(ff);
- inputs[attrib][1] = wrap(VIMMED1(0.0f));
- inputs[attrib][2] = wrap(VIMMED1(0.0f));
- inputs[attrib][3] = wrap(VIMMED1(0.0f));
- continue;
- }
- unsigned linkedAttrib =
- locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
-
- uint32_t extraAttribs = 0;
- if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
- /* non-gs generated primID - need to grab from swizzleMap override */
- linkedAttrib = pPrevShader->num_outputs - 1;
- swr_fs->constantMask |= 1 << linkedAttrib;
- extraAttribs++;
- } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
- key.sprite_coord_enable & (1 << semantic_idx)) {
- /* we add an extra attrib to the backendState in swr_update_derived. */
- linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1;
- swr_fs->pointSpriteMask |= (1 << linkedAttrib);
- extraAttribs++;
- } else if (linkedAttrib + 1 == 0xFFFFFFFF) {
- inputs[attrib][0] = wrap(VIMMED1(0.0f));
- inputs[attrib][1] = wrap(VIMMED1(0.0f));
- inputs[attrib][2] = wrap(VIMMED1(0.0f));
- inputs[attrib][3] = wrap(VIMMED1(1.0f));
- /* If we're reading in color and 2-sided lighting is enabled, we have
- * to keep going.
- */
- if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
- continue;
- } else {
- if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
- swr_fs->constantMask |= 1 << linkedAttrib;
- } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
- swr_fs->flatConstantMask |= 1 << linkedAttrib;
- }
- }
-
- unsigned bcolorAttrib = 0xFFFFFFFF;
- Value *offset = NULL;
- if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
- bcolorAttrib = locate_linkage(
- TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader);
- /* Neither front nor back colors were available. Nothing to load. */
- if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
- continue;
- /* If there is no front color, just always use the back color. */
- if (linkedAttrib + 1 == 0xFFFFFFFF)
- linkedAttrib = bcolorAttrib;
-
- if (bcolorAttrib != 0xFFFFFFFF) {
- bcolorAttrib -= 1;
- if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
- swr_fs->constantMask |= 1 << bcolorAttrib;
- } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
- swr_fs->flatConstantMask |= 1 << bcolorAttrib;
- }
-
- unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
-
- if (diff) {
- Value *back =
- XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
-
- offset = MUL(back, C(diff));
- offset->setName("offset");
- }
- }
- }
-
- for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
- if (mask & (1 << channel)) {
- Value *indexA = C(linkedAttrib * 12 + channel);
- Value *indexB = C(linkedAttrib * 12 + channel + 4);
- Value *indexC = C(linkedAttrib * 12 + channel + 8);
-
- if (offset) {
- indexA = ADD(indexA, offset);
- indexB = ADD(indexB, offset);
- indexC = ADD(indexC, offset);
- }
-
- Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
- Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
- Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
-
- if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
- inputs[attrib][channel] = wrap(va);
- } else {
- Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
-
- vc = FMUL(vk, vc);
-
- Value *interp = FMUL(va, vi);
- Value *interp1 = FMUL(vb, vj);
- interp = FADD(interp, interp1);
- interp = FADD(interp, vc);
- if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
- interpMode == TGSI_INTERPOLATE_COLOR)
- interp = FMUL(interp, vw);
- inputs[attrib][channel] = wrap(interp);
- }
- }
- }
- }
-
- sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
- assert(sampler != nullptr);
-
- struct lp_bld_tgsi_system_values system_values;
- memset(&system_values, 0, sizeof(system_values));
-
- struct lp_build_mask_context mask;
- bool uses_mask = false;
-
- if (swr_fs->info.base.uses_kill ||
- key.poly_stipple_enable) {
- Value *vActiveMask = NULL;
- if (swr_fs->info.base.uses_kill) {
- vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
- }
- if (key.poly_stipple_enable) {
- // first get fragment xy coords and clip to stipple bounds
- Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
- Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
- Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
- Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
-
- // stipple pattern is 32x32, which means that one line of stipple
- // is stored in one word:
- // vXstipple is bit offset inside 32-bit stipple word
- // vYstipple is word index is stipple array
- Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
- Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
-
- // grab stipple pattern base address
- Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
- stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
-
- // peform a gather to grab stipple words for each lane
- Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
- VIMMED1(0xffffffff), 4);
-
- // create a mask with one bit corresponding to the x stipple
- // and AND it with the pattern, to see if we have a bit
- Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
- Value *vStippleMask = AND(vStipple, vBitMask);
- vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
- vStippleMask = VMASK(vStippleMask);
-
- if (swr_fs->info.base.uses_kill) {
- vActiveMask = AND(vActiveMask, vStippleMask);
- } else {
- vActiveMask = vStippleMask;
- }
- }
- lp_build_mask_begin(
- &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
- uses_mask = true;
- }
-
- struct lp_build_tgsi_params params;
- memset(&params, 0, sizeof(params));
- params.type = lp_type_float_vec(32, 32 * 8);
- params.mask = uses_mask ? &mask : NULL;
- params.consts_ptr = wrap(consts_ptr);
- params.const_sizes_ptr = wrap(const_sizes_ptr);
- params.system_values = &system_values;
- params.inputs = inputs;
- params.context_ptr = wrap(hPrivateData);
- params.sampler = sampler;
- params.info = &swr_fs->info.base;
-
- lp_build_tgsi_soa(gallivm,
- swr_fs->pipe.tokens,
- &params,
- outputs);
-
- sampler->destroy(sampler);
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
- attrib++) {
- switch (swr_fs->info.base.output_semantic_name[attrib]) {
- case TGSI_SEMANTIC_POSITION: {
- // write z
- LLVMValueRef outZ =
- LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
- STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
- break;
- }
- case TGSI_SEMANTIC_COLOR: {
- for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
- if (!outputs[attrib][channel])
- continue;
-
- LLVMValueRef out =
- LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
- if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
- swr_fs->info.base.output_semantic_index[attrib] == 0) {
- for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
- STORE(unwrap(out),
- pPS,
- {0, SWR_PS_CONTEXT_shaded, rt, channel});
- }
- } else {
- STORE(unwrap(out),
- pPS,
- {0,
- SWR_PS_CONTEXT_shaded,
- swr_fs->info.base.output_semantic_index[attrib],
- channel});
- }
- }
- break;
- }
- default: {
- fprintf(stderr,
- "unknown output from FS %s[%d]\n",
- tgsi_semantic_names[swr_fs->info.base
- .output_semantic_name[attrib]],
- swr_fs->info.base.output_semantic_index[attrib]);
- break;
- }
- }
- }
-
- LLVMValueRef mask_result = 0;
- if (uses_mask) {
- mask_result = lp_build_mask_end(&mask);
- }
-
- IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
- if (uses_mask) {
- STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
- }
-
- RET_VOID();
-
- gallivm_verify_function(gallivm, wrap(pFunction));
-
- gallivm_compile_module(gallivm);
-
- // after the gallivm passes, we have to lower the core's intrinsics
- llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
- lowerPass.add(createLowerX86Pass(this));
- lowerPass.run(*pFunction);
-
- PFN_PIXEL_KERNEL kernel =
- (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
- debug_printf("frag shader %p\n", kernel);
- assert(kernel && "Error: FragShader = NULL");
-
- JM()->mIsModuleFinalized = true;
-
- return kernel;
-}
-
-PFN_PIXEL_KERNEL
-swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
-{
- if (!ctx->fs->pipe.tokens)
- return NULL;
-
- BuilderSWR builder(
- reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
- "FS");
- PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
-
- ctx->fs->map.insert(std::make_pair(key, std::unique_ptr<VariantFS>(new VariantFS(builder.gallivm, func))));
- return func;
-}
diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
deleted file mode 100644
index cabe915f312..00000000000
--- a/src/gallium/drivers/swr/swr_shader.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#pragma once
-
-struct swr_vertex_shader;
-struct swr_fragment_shader;
-struct swr_geometry_shader;
-struct swr_tess_control_shader;
-struct swr_tess_evaluation_shader;
-
-struct swr_jit_fs_key;
-struct swr_jit_vs_key;
-struct swr_jit_gs_key;
-struct swr_jit_tcs_key;
-struct swr_jit_tes_key;
-
-using PFN_TCS_FUNC = PFN_HS_FUNC;
-using PFN_TES_FUNC = PFN_DS_FUNC;
-
-unsigned swr_so_adjust_attrib(unsigned in_attrib,
- swr_vertex_shader *swr_vs);
-
-PFN_VERTEX_FUNC
-swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key);
-
-PFN_PIXEL_KERNEL
-swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key);
-
-PFN_GS_FUNC
-swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key);
-
-PFN_TCS_FUNC
-swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key);
-
-PFN_TES_FUNC
-swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key);
-
-void swr_generate_fs_key(struct swr_jit_fs_key &key,
- struct swr_context *ctx,
- swr_fragment_shader *swr_fs);
-
-void swr_generate_vs_key(struct swr_jit_vs_key &key,
- struct swr_context *ctx,
- swr_vertex_shader *swr_vs);
-
-void swr_generate_fetch_key(struct swr_jit_fetch_key &key,
- struct swr_vertex_element_state *velems);
-
-void swr_generate_gs_key(struct swr_jit_gs_key &key,
- struct swr_context *ctx,
- swr_geometry_shader *swr_gs);
-
-void swr_generate_tcs_key(struct swr_jit_tcs_key &key,
- struct swr_context *ctx,
- swr_tess_control_shader *swr_tcs);
-
-void swr_generate_tes_key(struct swr_jit_tes_key &key,
- struct swr_context *ctx,
- swr_tess_evaluation_shader *swr_tes);
-
-struct swr_jit_sampler_key {
- unsigned nr_samplers;
- unsigned nr_sampler_views;
- struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-};
-
-struct swr_jit_fs_key : swr_jit_sampler_key {
- unsigned nr_cbufs;
- unsigned light_twoside;
- unsigned sprite_coord_enable;
- ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
- ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
- bool poly_stipple_enable;
-};
-
-struct swr_jit_vs_key : swr_jit_sampler_key {
- unsigned clip_plane_mask; // from rasterizer state & vs_info
-};
-
-struct swr_jit_fetch_key {
- FETCH_COMPILE_STATE fsState;
-};
-
-struct swr_jit_gs_key : swr_jit_sampler_key {
- ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
- ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
-};
-
-// TESS_TODO: revisit this - we probably need to use
-// primitive modes, number of vertices emitted, etc.
-struct swr_jit_tcs_key : swr_jit_sampler_key {
- ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
- ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
- unsigned clip_plane_mask; // from rasterizer state & tcs_info
-};
-
-// TESS_TODO: revisit this
-struct swr_jit_tes_key : swr_jit_sampler_key {
- ubyte prev_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
- ubyte prev_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
- unsigned clip_plane_mask; // from rasterizer state & tes_info
-};
-
-namespace std
-{
-template <> struct hash<swr_jit_fs_key> {
- std::size_t operator()(const swr_jit_fs_key &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-
-template <> struct hash<swr_jit_vs_key> {
- std::size_t operator()(const swr_jit_vs_key &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-
-template <> struct hash<swr_jit_fetch_key> {
- std::size_t operator()(const swr_jit_fetch_key &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-
-template <> struct hash<swr_jit_gs_key> {
- std::size_t operator()(const swr_jit_gs_key &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-
-template <> struct hash<swr_jit_tcs_key> {
- std::size_t operator()(const swr_jit_tcs_key &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-
-template <> struct hash<swr_jit_tes_key> {
- std::size_t operator()(const swr_jit_tes_key &k) const
- {
- return util_hash_crc32(&k, sizeof(k));
- }
-};
-};
-
-bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs);
-bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs);
-bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs);
-bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs);
-bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs);
-bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs);
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
deleted file mode 100644
index 5f1464e6d0e..00000000000
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ /dev/null
@@ -1,2243 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#include <llvm/Config/llvm-config.h>
-
-#if LLVM_VERSION_MAJOR < 7
-// llvm redefines DEBUG
-#pragma push_macro("DEBUG")
-#undef DEBUG
-#endif
-
-#include <rasterizer/core/state.h>
-#include "JitManager.h"
-
-#if LLVM_VERSION_MAJOR < 7
-#pragma pop_macro("DEBUG")
-#endif
-
-#include "common/os.h"
-#include "jit_api.h"
-#include "gen_state_llvm.h"
-#include "core/multisample.h"
-#include "core/state_funcs.h"
-
-#include "gallivm/lp_bld_tgsi.h"
-#include "util/format/u_format.h"
-
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/u_helpers.h"
-#include "util/u_framebuffer.h"
-#include "util/u_viewport.h"
-#include "util/u_prim.h"
-
-#include "swr_state.h"
-#include "swr_context.h"
-#include "gen_surf_state_llvm.h"
-#include "gen_swr_context_llvm.h"
-#include "swr_screen.h"
-#include "swr_resource.h"
-#include "swr_tex_sample.h"
-#include "swr_scratch.h"
-#include "swr_shader.h"
-#include "swr_fence.h"
-
-/* These should be pulled out into separate files as necessary
- * Just initializing everything here to get going. */
-
-static void *
-swr_create_blend_state(struct pipe_context *pipe,
- const struct pipe_blend_state *blend)
-{
- struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
- assert(state != nullptr);
-
- memcpy(&state->pipe, blend, sizeof(*blend));
-
- struct pipe_blend_state *pipe_blend = &state->pipe;
-
- for (int target = 0;
- target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
- target++) {
-
- struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
- SWR_RENDER_TARGET_BLEND_STATE &blendState =
- state->blendState.renderTarget[target];
- RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
- state->compileState[target];
-
- if (target != 0 && !pipe_blend->independent_blend_enable) {
- memcpy(&compileState,
- &state->compileState[0],
- sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
- continue;
- }
-
- compileState.blendEnable = rt_blend->blend_enable;
- if (compileState.blendEnable) {
- compileState.sourceAlphaBlendFactor =
- swr_convert_blend_factor(rt_blend->alpha_src_factor);
- compileState.destAlphaBlendFactor =
- swr_convert_blend_factor(rt_blend->alpha_dst_factor);
- compileState.sourceBlendFactor =
- swr_convert_blend_factor(rt_blend->rgb_src_factor);
- compileState.destBlendFactor =
- swr_convert_blend_factor(rt_blend->rgb_dst_factor);
-
- compileState.colorBlendFunc =
- swr_convert_blend_func(rt_blend->rgb_func);
- compileState.alphaBlendFunc =
- swr_convert_blend_func(rt_blend->alpha_func);
- }
- compileState.logicOpEnable = state->pipe.logicop_enable;
- if (compileState.logicOpEnable) {
- compileState.logicOpFunc =
- swr_convert_logic_op(state->pipe.logicop_func);
- }
-
- blendState.writeDisableRed =
- (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
- blendState.writeDisableGreen =
- (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
- blendState.writeDisableBlue =
- (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
- blendState.writeDisableAlpha =
- (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
-
- if (rt_blend->colormask == 0)
- compileState.blendEnable = false;
- }
-
- return state;
-}
-
-static void
-swr_bind_blend_state(struct pipe_context *pipe, void *blend)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->blend == blend)
- return;
-
- ctx->blend = (swr_blend_state *)blend;
-
- ctx->dirty |= SWR_NEW_BLEND;
-}
-
-static void
-swr_delete_blend_state(struct pipe_context *pipe, void *blend)
-{
- FREE(blend);
-}
-
-static void
-swr_set_blend_color(struct pipe_context *pipe,
- const struct pipe_blend_color *color)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- ctx->blend_color = *color;
-
- ctx->dirty |= SWR_NEW_BLEND;
-}
-
-static void
-swr_set_stencil_ref(struct pipe_context *pipe,
- const struct pipe_stencil_ref ref)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- ctx->stencil_ref = ref;
-
- ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
-}
-
-static void *
-swr_create_depth_stencil_state(
- struct pipe_context *pipe,
- const struct pipe_depth_stencil_alpha_state *depth_stencil)
-{
- struct pipe_depth_stencil_alpha_state *state;
-
- state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
- sizeof *depth_stencil);
-
- return state;
-}
-
-static void
-swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
- return;
-
- ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
-
- ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
-}
-
-static void
-swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
-{
- FREE(depth);
-}
-
-
-static void *
-swr_create_rasterizer_state(struct pipe_context *pipe,
- const struct pipe_rasterizer_state *rast)
-{
- struct pipe_rasterizer_state *state;
- state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
-
- return state;
-}
-
-static void
-swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
-{
- struct swr_context *ctx = swr_context(pipe);
- const struct pipe_rasterizer_state *rasterizer =
- (const struct pipe_rasterizer_state *)handle;
-
- if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
- return;
-
- ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
-
- ctx->dirty |= SWR_NEW_RASTERIZER;
-}
-
-static void
-swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
-{
- FREE(rasterizer);
-}
-
-
-static void *
-swr_create_sampler_state(struct pipe_context *pipe,
- const struct pipe_sampler_state *sampler)
-{
- struct pipe_sampler_state *state =
- (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
-
- return state;
-}
-
-static void
-swr_bind_sampler_states(struct pipe_context *pipe,
- enum pipe_shader_type shader,
- unsigned start,
- unsigned num,
- void **samplers)
-{
- struct swr_context *ctx = swr_context(pipe);
- unsigned i;
-
- assert(shader < PIPE_SHADER_TYPES);
- assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
-
- /* set the new samplers */
- ctx->num_samplers[shader] = num;
- for (i = 0; i < num; i++) {
- ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
- }
-
- ctx->dirty |= SWR_NEW_SAMPLER;
-}
-
-static void
-swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
-{
- FREE(sampler);
-}
-
-
-static struct pipe_sampler_view *
-swr_create_sampler_view(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *templ)
-{
- struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
- if (view) {
- *view = *templ;
- view->reference.count = 1;
- view->texture = NULL;
- pipe_resource_reference(&view->texture, texture);
- view->context = pipe;
- }
-
- return view;
-}
-
-static void
-swr_set_sampler_views(struct pipe_context *pipe,
- enum pipe_shader_type shader,
- unsigned start,
- unsigned num,
- unsigned unbind_num_trailing_slots,
- bool take_ownership,
- struct pipe_sampler_view **views)
-{
- struct swr_context *ctx = swr_context(pipe);
- uint i;
-
- assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
-
- assert(shader < PIPE_SHADER_TYPES);
- assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
-
- /* set the new sampler views */
- ctx->num_sampler_views[shader] = num;
- for (i = 0; i < num; i++) {
- if (take_ownership) {
- pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
- NULL);
- ctx->sampler_views[shader][start + i] = views[i];
- } else {
- pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
- views[i]);
- }
- }
- for (; i < num + unbind_num_trailing_slots; i++) {
- pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
- NULL);
- }
-
- ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
-}
-
-static void
-swr_sampler_view_destroy(struct pipe_context *pipe,
- struct pipe_sampler_view *view)
-{
- pipe_resource_reference(&view->texture, NULL);
- FREE(view);
-}
-
-static void *
-swr_create_vs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *vs)
-{
- struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
- if (!swr_vs)
- return NULL;
-
- swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
- swr_vs->pipe.stream_output = vs->stream_output;
-
- lp_build_tgsi_info(vs->tokens, &swr_vs->info);
-
- swr_vs->soState = {0};
-
- if (swr_vs->pipe.stream_output.num_outputs) {
- pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
-
- swr_vs->soState.soEnable = true;
- // soState.rasterizerDisable set on state dirty
- // soState.streamToRasterizer not used
-
- for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
- unsigned attrib_slot = stream_output->output[i].register_index;
- attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs);
- swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
- (1 << attrib_slot);
- }
- for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
- swr_vs->soState.streamNumEntries[i] =
- _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
- }
- }
-
- return swr_vs;
-}
-
-static void
-swr_bind_vs_state(struct pipe_context *pipe, void *vs)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->vs == vs)
- return;
-
- ctx->vs = (swr_vertex_shader *)vs;
- ctx->dirty |= SWR_NEW_VS;
-}
-
-static void
-swr_delete_vs_state(struct pipe_context *pipe, void *vs)
-{
- struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
- FREE((void *)swr_vs->pipe.tokens);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* Defer deletion of vs state */
- swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
-}
-
-static void *
-swr_create_fs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *fs)
-{
- struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
- if (!swr_fs)
- return NULL;
-
- swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
-
- lp_build_tgsi_info(fs->tokens, &swr_fs->info);
-
- return swr_fs;
-}
-
-
-static void
-swr_bind_fs_state(struct pipe_context *pipe, void *fs)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->fs == fs)
- return;
-
- ctx->fs = (swr_fragment_shader *)fs;
- ctx->dirty |= SWR_NEW_FS;
-}
-
-static void
-swr_delete_fs_state(struct pipe_context *pipe, void *fs)
-{
- struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
- FREE((void *)swr_fs->pipe.tokens);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* Defer deleton of fs state */
- swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
-}
-
-static void *
-swr_create_gs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *gs)
-{
- struct swr_geometry_shader *swr_gs = new swr_geometry_shader;
- if (!swr_gs)
- return NULL;
-
- swr_gs->pipe.tokens = tgsi_dup_tokens(gs->tokens);
- lp_build_tgsi_info(gs->tokens, &swr_gs->info);
- return swr_gs;
-}
-
-static void
-swr_bind_gs_state(struct pipe_context *pipe, void *gs)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->gs == gs)
- return;
-
- ctx->gs = (swr_geometry_shader *)gs;
- ctx->dirty |= SWR_NEW_GS;
-}
-
-static void
-swr_delete_gs_state(struct pipe_context *pipe, void *gs)
-{
- struct swr_geometry_shader *swr_gs = (swr_geometry_shader *)gs;
- FREE((void *)swr_gs->pipe.tokens);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* Defer deleton of fs state */
- swr_fence_work_delete_gs(screen->flush_fence, swr_gs);
-}
-
-static void *
-swr_create_tcs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *tcs)
-{
- struct swr_tess_control_shader *swr_tcs = new swr_tess_control_shader;
- if (!swr_tcs)
- return NULL;
-
- swr_tcs->pipe.tokens = tgsi_dup_tokens(tcs->tokens);
- lp_build_tgsi_info(tcs->tokens, &swr_tcs->info);
- return swr_tcs;
-}
-
-static void
-swr_bind_tcs_state(struct pipe_context *pipe, void *tcs)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->tcs == tcs)
- return;
-
- ctx->tcs = (swr_tess_control_shader *)tcs;
- ctx->dirty |= SWR_NEW_TCS;
- ctx->dirty |= SWR_NEW_TS;
-}
-
-static void
-swr_delete_tcs_state(struct pipe_context *pipe, void *tcs)
-{
- struct swr_tess_control_shader *swr_tcs = (swr_tess_control_shader *)tcs;
- FREE((void *)swr_tcs->pipe.tokens);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* Defer deleton of tcs state */
- swr_fence_work_delete_tcs(screen->flush_fence, swr_tcs);
-}
-
-static void *
-swr_create_tes_state(struct pipe_context *pipe,
- const struct pipe_shader_state *tes)
-{
- struct swr_tess_evaluation_shader *swr_tes = new swr_tess_evaluation_shader;
- if (!swr_tes)
- return NULL;
-
- swr_tes->pipe.tokens = tgsi_dup_tokens(tes->tokens);
- lp_build_tgsi_info(tes->tokens, &swr_tes->info);
- return swr_tes;
-}
-
-static void
-swr_bind_tes_state(struct pipe_context *pipe, void *tes)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (ctx->tes == tes)
- return;
-
- // Save current tessellator state first
- if (ctx->tes != nullptr) {
- ctx->tes->ts_state = ctx->tsState;
- }
-
- ctx->tes = (swr_tess_evaluation_shader *)tes;
-
- ctx->dirty |= SWR_NEW_TES;
- ctx->dirty |= SWR_NEW_TS;
-}
-
-static void
-swr_delete_tes_state(struct pipe_context *pipe, void *tes)
-{
- struct swr_tess_evaluation_shader *swr_tes = (swr_tess_evaluation_shader *)tes;
- FREE((void *)swr_tes->pipe.tokens);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* Defer deleton of tes state */
- swr_fence_work_delete_tes(screen->flush_fence, swr_tes);
-}
-
-static void
-swr_set_constant_buffer(struct pipe_context *pipe,
- enum pipe_shader_type shader,
- uint index, bool take_ownership,
- const struct pipe_constant_buffer *cb)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct pipe_resource *constants = cb ? cb->buffer : NULL;
-
- assert(shader < PIPE_SHADER_TYPES);
- assert(index < ARRAY_SIZE(ctx->constants[shader]));
-
- /* note: reference counting */
- util_copy_constant_buffer(&ctx->constants[shader][index], cb, take_ownership);
-
- if (shader == PIPE_SHADER_VERTEX) {
- ctx->dirty |= SWR_NEW_VSCONSTANTS;
- } else if (shader == PIPE_SHADER_FRAGMENT) {
- ctx->dirty |= SWR_NEW_FSCONSTANTS;
- } else if (shader == PIPE_SHADER_GEOMETRY) {
- ctx->dirty |= SWR_NEW_GSCONSTANTS;
- } else if (shader == PIPE_SHADER_TESS_CTRL) {
- ctx->dirty |= SWR_NEW_TCSCONSTANTS;
- } else if (shader == PIPE_SHADER_TESS_EVAL) {
- ctx->dirty |= SWR_NEW_TESCONSTANTS;
- }
- if (cb && cb->user_buffer) {
- pipe_resource_reference(&constants, NULL);
- }
-}
-
-
-static void *
-swr_create_vertex_elements_state(struct pipe_context *pipe,
- unsigned num_elements,
- const struct pipe_vertex_element *attribs)
-{
- struct swr_vertex_element_state *velems;
- assert(num_elements <= PIPE_MAX_ATTRIBS);
- velems = new swr_vertex_element_state;
- if (velems) {
- memset((void*)&velems->fsState, 0, sizeof(velems->fsState));
- velems->fsState.bVertexIDOffsetEnable = true;
- velems->fsState.numAttribs = num_elements;
- for (unsigned i = 0; i < num_elements; i++) {
- // XXX: we should do this keyed on the VS usage info
-
- const struct util_format_description *desc =
- util_format_description((enum pipe_format)attribs[i].src_format);
-
- velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
- velems->fsState.layout[i].Format =
- mesa_to_swr_format((enum pipe_format)attribs[i].src_format);
- velems->fsState.layout[i].StreamIndex =
- attribs[i].vertex_buffer_index;
- velems->fsState.layout[i].InstanceEnable =
- attribs[i].instance_divisor != 0;
- velems->fsState.layout[i].ComponentControl0 =
- desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
- ? ComponentControl::StoreSrc
- : ComponentControl::Store0;
- velems->fsState.layout[i].ComponentControl1 =
- desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
- ? ComponentControl::StoreSrc
- : ComponentControl::Store0;
- velems->fsState.layout[i].ComponentControl2 =
- desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
- ? ComponentControl::StoreSrc
- : ComponentControl::Store0;
- velems->fsState.layout[i].ComponentControl3 =
- desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
- ? ComponentControl::StoreSrc
- : ComponentControl::Store1Fp;
- velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
- velems->fsState.layout[i].InstanceAdvancementState =
- attribs[i].instance_divisor;
-
- /* Calculate the pitch of each stream */
- const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
- mesa_to_swr_format((enum pipe_format)attribs[i].src_format));
- velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
-
- if (attribs[i].instance_divisor != 0) {
- velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
- uint32_t *min_instance_div =
- &velems->min_instance_div[attribs[i].vertex_buffer_index];
- if (!*min_instance_div ||
- attribs[i].instance_divisor < *min_instance_div)
- *min_instance_div = attribs[i].instance_divisor;
- }
- }
- }
-
- return velems;
-}
-
-static void
-swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_vertex_element_state *swr_velems =
- (struct swr_vertex_element_state *)velems;
-
- ctx->velems = swr_velems;
- ctx->dirty |= SWR_NEW_VERTEX;
-}
-
-static void
-swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
-{
- struct swr_vertex_element_state *swr_velems =
- (struct swr_vertex_element_state *) velems;
- /* XXX Need to destroy fetch shader? */
- delete swr_velems;
-}
-
-
-static void
-swr_set_vertex_buffers(struct pipe_context *pipe,
- unsigned start_slot,
- unsigned num_elements,
- unsigned unbind_num_trailing_slots,
- bool take_ownership,
- const struct pipe_vertex_buffer *buffers)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- assert(num_elements <= PIPE_MAX_ATTRIBS);
-
- util_set_vertex_buffers_count(ctx->vertex_buffer,
- &ctx->num_vertex_buffers,
- buffers,
- start_slot,
- num_elements,
- unbind_num_trailing_slots,
- take_ownership);
-
- ctx->dirty |= SWR_NEW_VERTEX;
-}
-
-
-static void
-swr_set_polygon_stipple(struct pipe_context *pipe,
- const struct pipe_poly_stipple *stipple)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- ctx->poly_stipple.pipe = *stipple; /* struct copy */
- ctx->dirty |= SWR_NEW_STIPPLE;
-}
-
-static void
-swr_set_clip_state(struct pipe_context *pipe,
- const struct pipe_clip_state *clip)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- ctx->clip = *clip;
- /* XXX Unimplemented, but prevents crash */
-
- ctx->dirty |= SWR_NEW_CLIP;
-}
-
-
-static void
-swr_set_scissor_states(struct pipe_context *pipe,
- unsigned start_slot,
- unsigned num_scissors,
- const struct pipe_scissor_state *scissors)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- memcpy(ctx->scissors + start_slot, scissors,
- sizeof(struct pipe_scissor_state) * num_scissors);
-
- for (unsigned i = 0; i < num_scissors; i++) {
- auto idx = start_slot + i;
- ctx->swr_scissors[idx].xmin = scissors[idx].minx;
- ctx->swr_scissors[idx].xmax = scissors[idx].maxx;
- ctx->swr_scissors[idx].ymin = scissors[idx].miny;
- ctx->swr_scissors[idx].ymax = scissors[idx].maxy;
- }
- ctx->dirty |= SWR_NEW_SCISSOR;
-}
-
-static void
-swr_set_viewport_states(struct pipe_context *pipe,
- unsigned start_slot,
- unsigned num_viewports,
- const struct pipe_viewport_state *vpt)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- memcpy(ctx->viewports + start_slot, vpt, sizeof(struct pipe_viewport_state) * num_viewports);
- ctx->dirty |= SWR_NEW_VIEWPORT;
-}
-
-
-static void
-swr_set_framebuffer_state(struct pipe_context *pipe,
- const struct pipe_framebuffer_state *fb)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- bool changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
-
- assert(fb->width <= KNOB_GUARDBAND_WIDTH);
- assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
-
- if (changed) {
- util_copy_framebuffer_state(&ctx->framebuffer, fb);
-
- /* 0 and 1 both indicate no msaa. Core doesn't understand 0 samples */
- ctx->framebuffer.samples = std::max((ubyte)1, ctx->framebuffer.samples);
-
- ctx->dirty |= SWR_NEW_FRAMEBUFFER;
- }
-}
-
-
-static void
-swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- if (sample_mask != ctx->sample_mask) {
- ctx->sample_mask = sample_mask;
- ctx->dirty |= SWR_NEW_RASTERIZER;
- }
-}
-
-/*
- * MSAA fixed sample position table
- * used by update_derived and get_sample_position
- * (integer locations on a 16x16 grid)
- */
-static const uint8_t swr_sample_positions[][2] =
-{ /* 1x*/ { 8, 8},
- /* 2x*/ {12,12},{ 4, 4},
- /* 4x*/ { 6, 2},{14, 6},{ 2,10},{10,14},
- /* 8x*/ { 9, 5},{ 7,11},{13, 9},{ 5, 3},
- { 3,13},{ 1, 7},{11,15},{15, 1},
- /*16x*/ { 9, 9},{ 7, 5},{ 5,10},{12, 7},
- { 3, 6},{10,13},{13,11},{11, 3},
- { 6,14},{ 8, 1},{ 4, 2},{ 2,12},
- { 0, 8},{15, 4},{14,15},{ 1, 0} };
-
-static void
-swr_get_sample_position(struct pipe_context *pipe,
- unsigned sample_count, unsigned sample_index,
- float *out_value)
-{
- /* validate sample_count */
- sample_count = GetNumSamples(GetSampleCount(sample_count));
-
- const uint8_t *sample = swr_sample_positions[sample_count-1 + sample_index];
- out_value[0] = sample[0] / 16.0f;
- out_value[1] = sample[1] / 16.0f;
-}
-
-
-/*
- * Update resource in-use status
- * All resources bound to color or depth targets marked as WRITE resources.
- * VBO Vertex/index buffers and texture views marked as READ resources.
- */
-void
-swr_update_resource_status(struct pipe_context *pipe,
- const struct pipe_draw_info *p_draw_info)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct pipe_framebuffer_state *fb = &ctx->framebuffer;
-
- /* colorbuffer targets */
- if (fb->nr_cbufs)
- for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
- if (fb->cbufs[i])
- swr_resource_write(fb->cbufs[i]->texture);
-
- /* depth/stencil target */
- if (fb->zsbuf)
- swr_resource_write(fb->zsbuf->texture);
-
- /* VBO vertex buffers */
- for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
- struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
- if (!vb->is_user_buffer && vb->buffer.resource)
- swr_resource_read(vb->buffer.resource);
- }
-
- /* VBO index buffer */
- if (p_draw_info && p_draw_info->index_size) {
- if (!p_draw_info->has_user_indices)
- swr_resource_read(p_draw_info->index.resource);
- }
-
- /* transform feedback buffers */
- for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
- struct pipe_stream_output_target *target = ctx->so_targets[i];
- if (target && target->buffer)
- swr_resource_write(target->buffer);
- }
-
- /* texture sampler views */
- for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
- for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
- struct pipe_sampler_view *view = ctx->sampler_views[j][i];
- if (view)
- swr_resource_read(view->texture);
- }
- }
-
- /* constant buffers */
- for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
- for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- struct pipe_constant_buffer *cb = &ctx->constants[j][i];
- if (cb->buffer)
- swr_resource_read(cb->buffer);
- }
- }
-}
-
-static void
-swr_update_texture_state(struct swr_context *ctx,
- enum pipe_shader_type shader_type,
- unsigned num_sampler_views,
- swr_jit_texture *textures)
-{
- for (unsigned i = 0; i < num_sampler_views; i++) {
- struct pipe_sampler_view *view =
- ctx->sampler_views[shader_type][i];
- struct swr_jit_texture *jit_tex = &textures[i];
-
- memset(jit_tex, 0, sizeof(*jit_tex));
- if (view) {
- struct pipe_resource *res = view->texture;
- struct swr_resource *swr_res = swr_resource(res);
- SWR_SURFACE_STATE *swr = &swr_res->swr;
- size_t *mip_offsets = swr_res->mip_offsets;
- if (swr_res->has_depth && swr_res->has_stencil &&
- !util_format_has_depth(util_format_description(view->format))) {
- swr = &swr_res->secondary;
- mip_offsets = swr_res->secondary_mip_offsets;
- }
-
- jit_tex->width = res->width0;
- jit_tex->height = res->height0;
- jit_tex->base_ptr = (uint8_t*)swr->xpBaseAddress;
- jit_tex->num_samples = swr->numSamples;
- jit_tex->sample_stride = 0;
- if (view->target != PIPE_BUFFER) {
- jit_tex->first_level = view->u.tex.first_level;
- jit_tex->last_level = view->u.tex.last_level;
- if (view->target == PIPE_TEXTURE_3D)
- jit_tex->depth = res->depth0;
- else
- jit_tex->depth =
- view->u.tex.last_layer - view->u.tex.first_layer + 1;
- jit_tex->base_ptr += view->u.tex.first_layer *
- swr->qpitch * swr->pitch;
- } else {
- unsigned view_blocksize = util_format_get_blocksize(view->format);
- jit_tex->base_ptr += view->u.buf.offset;
- jit_tex->width = view->u.buf.size / view_blocksize;
- jit_tex->depth = 1;
- }
-
- for (unsigned level = jit_tex->first_level;
- level <= jit_tex->last_level;
- level++) {
- jit_tex->row_stride[level] = swr->pitch;
- jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
- jit_tex->mip_offsets[level] = mip_offsets[level];
- }
- }
- }
-}
-
-static void
-swr_update_sampler_state(struct swr_context *ctx,
- enum pipe_shader_type shader_type,
- unsigned num_samplers,
- swr_jit_sampler *samplers)
-{
- for (unsigned i = 0; i < num_samplers; i++) {
- const struct pipe_sampler_state *sampler =
- ctx->samplers[shader_type][i];
-
- if (sampler) {
- samplers[i].min_lod = sampler->min_lod;
- samplers[i].max_lod = sampler->max_lod;
- samplers[i].lod_bias = sampler->lod_bias;
- COPY_4V(samplers[i].border_color, sampler->border_color.f);
- }
- }
-}
-
-static void
-swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
-{
- swr_draw_context *pDC = &ctx->swrDC;
-
- const float **constant;
- uint32_t *num_constants;
- struct swr_scratch_space *scratch;
-
- switch (shaderType) {
- case PIPE_SHADER_VERTEX:
- constant = pDC->constantVS;
- num_constants = pDC->num_constantsVS;
- scratch = &ctx->scratch->vs_constants;
- break;
- case PIPE_SHADER_FRAGMENT:
- constant = pDC->constantFS;
- num_constants = pDC->num_constantsFS;
- scratch = &ctx->scratch->fs_constants;
- break;
- case PIPE_SHADER_GEOMETRY:
- constant = pDC->constantGS;
- num_constants = pDC->num_constantsGS;
- scratch = &ctx->scratch->gs_constants;
- break;
- case PIPE_SHADER_TESS_CTRL:
- constant = pDC->constantTCS;
- num_constants = pDC->num_constantsTCS;
- scratch = &ctx->scratch->tcs_constants;
- break;
- case PIPE_SHADER_TESS_EVAL:
- constant = pDC->constantTES;
- num_constants = pDC->num_constantsTES;
- scratch = &ctx->scratch->tes_constants;
- break;
- default:
- assert(0 && "Unsupported shader type constants");
- return;
- }
-
- for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
- num_constants[i] = cb->buffer_size;
- if (cb->buffer) {
- constant[i] =
- (const float *)(swr_resource_data(cb->buffer) +
- cb->buffer_offset);
- } else {
- /* Need to copy these constants to scratch space */
- if (cb->user_buffer && cb->buffer_size) {
- const void *ptr =
- ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
- uint32_t size = AlignUp(cb->buffer_size, 4);
- ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
- constant[i] = (const float *)ptr;
- }
- }
- }
-}
-
-static bool
-swr_change_rt(struct swr_context *ctx,
- unsigned attachment,
- const struct pipe_surface *sf)
-{
- swr_draw_context *pDC = &ctx->swrDC;
- struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
-
- /* Do nothing if the render target hasn't changed */
- if ((!sf || !sf->texture) && (void*)(rt->xpBaseAddress) == nullptr)
- return false;
-
- /* Deal with disabling RT up front */
- if (!sf || !sf->texture) {
- /* If detaching attachment, mark tiles as RESOLVED so core
- * won't try to load from non-existent target. */
- swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
- *rt = {0};
- return true;
- }
-
- const struct swr_resource *swr = swr_resource(sf->texture);
- const SWR_SURFACE_STATE *swr_surface = &swr->swr;
- SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
-
- if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.xpBaseAddress) {
- swr_surface = &swr->secondary;
- fmt = swr_surface->format;
- }
-
- if (rt->xpBaseAddress == swr_surface->xpBaseAddress &&
- rt->format == fmt &&
- rt->lod == sf->u.tex.level &&
- rt->arrayIndex == sf->u.tex.first_layer)
- return false;
-
- bool need_fence = false;
-
- /* StoreTile for changed target */
- if (rt->xpBaseAddress) {
- /* If changing attachment to a new target, mark tiles as
- * INVALID so they are reloaded from surface. */
- swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
- need_fence = true;
- } else {
- /* if no previous attachment, invalidate tiles that may be marked
- * RESOLVED because of an old attachment */
- swr_invalidate_render_target(&ctx->pipe, attachment, sf->width, sf->height);
- /* no need to set fence here */
- }
-
- /* Make new attachment */
- *rt = *swr_surface;
- rt->format = fmt;
- rt->lod = sf->u.tex.level;
- rt->arrayIndex = sf->u.tex.first_layer;
-
- return need_fence;
-}
-
-/*
- * for cases where resources are shared between contexts, invalidate
- * this ctx's resource. so it can be fetched fresh. Old ctx's resource
- * is already stored during a flush
- */
-static inline void
-swr_invalidate_buffers_after_ctx_change(struct pipe_context *pipe)
-{
- struct swr_context *ctx = swr_context(pipe);
-
- for (uint32_t i = 0; i < ctx->framebuffer.nr_cbufs; i++) {
- struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
- if (cb) {
- struct swr_resource *res = swr_resource(cb->texture);
- if (res->curr_pipe != pipe) {
- /* if curr_pipe is NULL (first use), status should not be WRITE */
- assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE));
- if (res->status & SWR_RESOURCE_WRITE) {
- swr_invalidate_render_target(pipe, i, cb->width, cb->height);
- }
- }
- res->curr_pipe = pipe;
- }
- }
- if (ctx->framebuffer.zsbuf) {
- struct pipe_surface *zb = ctx->framebuffer.zsbuf;
- if (zb) {
- struct swr_resource *res = swr_resource(zb->texture);
- if (res->curr_pipe != pipe) {
- /* if curr_pipe is NULL (first use), status should not be WRITE */
- assert(res->curr_pipe || !(res->status & SWR_RESOURCE_WRITE));
- if (res->status & SWR_RESOURCE_WRITE) {
- swr_invalidate_render_target(pipe, SWR_ATTACHMENT_DEPTH, zb->width, zb->height);
- swr_invalidate_render_target(pipe, SWR_ATTACHMENT_STENCIL, zb->width, zb->height);
- }
- }
- res->curr_pipe = pipe;
- }
- }
-}
-
-static inline void
-swr_user_vbuf_range(const struct pipe_draw_info *info,
- const struct swr_vertex_element_state *velems,
- const struct pipe_vertex_buffer *vb,
- uint32_t i,
- uint32_t *totelems,
- uint32_t *base,
- uint32_t *size,
- int index_bias)
-{
- /* FIXME: The size is too large - we don't access the full extra stride. */
- unsigned elems;
- unsigned elem_pitch = vb->stride + velems->stream_pitch[i];
- if (velems->instanced_bufs & (1U << i)) {
- elems = info->instance_count / velems->min_instance_div[i] + 1;
- *totelems = info->start_instance + elems;
- *base = info->start_instance * vb->stride;
- *size = elems * elem_pitch;
- } else if (vb->stride) {
- elems = info->max_index - info->min_index + 1;
- *totelems = (info->max_index + (info->index_size ? index_bias : 0)) + 1;
- *base = (info->min_index + (info->index_size ? index_bias : 0)) * vb->stride;
- *size = elems * elem_pitch;
- } else {
- *totelems = 1;
- *base = 0;
- *size = velems->stream_pitch[i];
- }
-}
-
-static void
-swr_update_poly_stipple(struct swr_context *ctx)
-{
- struct swr_draw_context *pDC = &ctx->swrDC;
-
- assert(sizeof(ctx->poly_stipple.pipe.stipple) == sizeof(pDC->polyStipple));
- memcpy(pDC->polyStipple,
- ctx->poly_stipple.pipe.stipple,
- sizeof(ctx->poly_stipple.pipe.stipple));
-}
-
-
-static struct tgsi_shader_info *
-swr_get_last_fe(const struct swr_context *ctx)
-{
- tgsi_shader_info *pLastFE = &ctx->vs->info.base;
-
- if (ctx->gs) {
- pLastFE = &ctx->gs->info.base;
- }
- else if (ctx->tes) {
- pLastFE = &ctx->tes->info.base;
- }
- else if (ctx->tcs) {
- pLastFE = &ctx->tcs->info.base;
- }
- return pLastFE;
-}
-
-
-void
-swr_update_derived(struct pipe_context *pipe,
- const struct pipe_draw_info *p_draw_info,
- const struct pipe_draw_start_count_bias *draw)
-{
- struct swr_context *ctx = swr_context(pipe);
- struct swr_screen *screen = swr_screen(pipe->screen);
-
- /* When called from swr_clear (p_draw_info = null), set any null
- * state-objects to the dummy state objects to prevent nullptr dereference
- * in validation below.
- *
- * Important that this remains static for zero initialization. These
- * aren't meant to be proper state objects, just empty structs. They will
- * not be written to.
- *
- * Shaders can't be part of the union since they contain std::unordered_map
- */
- static struct {
- union {
- struct pipe_rasterizer_state rasterizer;
- struct pipe_depth_stencil_alpha_state depth_stencil;
- struct swr_blend_state blend;
- } state;
- struct swr_vertex_shader vs;
- struct swr_fragment_shader fs;
- } swr_dummy;
-
- if (!p_draw_info) {
- if (!ctx->rasterizer)
- ctx->rasterizer = &swr_dummy.state.rasterizer;
- if (!ctx->depth_stencil)
- ctx->depth_stencil = &swr_dummy.state.depth_stencil;
- if (!ctx->blend)
- ctx->blend = &swr_dummy.state.blend;
- if (!ctx->vs)
- ctx->vs = &swr_dummy.vs;
- if (!ctx->fs)
- ctx->fs = &swr_dummy.fs;
- }
-
- /* Update screen->pipe to current pipe context. */
- screen->pipe = pipe;
-
- /* Any state that requires dirty flags to be re-triggered sets this mask */
- /* For example, user_buffer vertex and index buffers. */
- unsigned post_update_dirty_flags = 0;
-
- /* bring resources that changed context up-to-date */
- swr_invalidate_buffers_after_ctx_change(pipe);
-
- /* Render Targets */
- if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
- struct pipe_framebuffer_state *fb = &ctx->framebuffer;
- const struct util_format_description *desc = NULL;
- bool need_fence = false;
-
- /* colorbuffer targets */
- if (fb->nr_cbufs) {
- for (unsigned i = 0; i < fb->nr_cbufs; ++i)
- need_fence |= swr_change_rt(
- ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
- }
- for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
- need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
-
- /* depth/stencil target */
- if (fb->zsbuf)
- desc = util_format_description(fb->zsbuf->format);
- if (fb->zsbuf && util_format_has_depth(desc))
- need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
- else
- need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
-
- if (fb->zsbuf && util_format_has_stencil(desc))
- need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
- else
- need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
-
- /* This fence ensures any attachment changes are resolved before the
- * next draw */
- if (need_fence)
- swr_fence_submit(ctx, screen->flush_fence);
- }
-
- /* Raster state */
- if (ctx->dirty & (SWR_NEW_RASTERIZER |
- SWR_NEW_VS | // clipping
- SWR_NEW_TES |
- SWR_NEW_TCS |
- SWR_NEW_FRAMEBUFFER)) {
- pipe_rasterizer_state *rasterizer = ctx->rasterizer;
- pipe_framebuffer_state *fb = &ctx->framebuffer;
-
- SWR_RASTSTATE *rastState = &ctx->derived.rastState;
- rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
- rastState->frontWinding = rasterizer->front_ccw
- ? SWR_FRONTWINDING_CCW
- : SWR_FRONTWINDING_CW;
- rastState->scissorEnable = rasterizer->scissor;
- rastState->pointSize = rasterizer->point_size > 0.0f
- ? rasterizer->point_size
- : 1.0f;
- rastState->lineWidth = rasterizer->line_width > 0.0f
- ? rasterizer->line_width
- : 1.0f;
-
- rastState->pointParam = rasterizer->point_size_per_vertex;
-
- rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
- rastState->pointSpriteTopOrigin =
- rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
-
- /* If SWR_MSAA_FORCE_ENABLE is set, turn msaa on */
- if (screen->msaa_force_enable && !rasterizer->multisample) {
- /* Force enable and use the value the surface was created with */
- rasterizer->multisample = true;
- fb->samples = swr_resource(fb->cbufs[0]->texture)->swr.numSamples;
- fprintf(stderr,"msaa force enable: %d samples\n", fb->samples);
- }
-
- rastState->sampleCount = GetSampleCount(fb->samples);
- rastState->forcedSampleCount = false;
- rastState->bIsCenterPattern = !rasterizer->multisample;
- rastState->pixelLocation = SWR_PIXEL_LOCATION_CENTER;
-
- /* Only initialize sample positions if msaa is enabled */
- if (rasterizer->multisample) {
- for (uint32_t i = 0; i < fb->samples; i++) {
- const uint8_t *sample = swr_sample_positions[fb->samples-1 + i];
- rastState->samplePositions.SetXi(i, sample[0] << 4);
- rastState->samplePositions.SetYi(i, sample[1] << 4);
- rastState->samplePositions.SetX (i, sample[0] / 16.0f);
- rastState->samplePositions.SetY (i, sample[1] / 16.0f);
- }
- rastState->samplePositions.PrecalcSampleData(fb->samples);
- }
-
- bool do_offset = false;
- switch (rasterizer->fill_front) {
- case PIPE_POLYGON_MODE_FILL:
- do_offset = rasterizer->offset_tri;
- break;
- case PIPE_POLYGON_MODE_LINE:
- do_offset = rasterizer->offset_line;
- break;
- case PIPE_POLYGON_MODE_POINT:
- do_offset = rasterizer->offset_point;
- break;
- }
-
- if (do_offset) {
- rastState->depthBias = rasterizer->offset_units;
- rastState->slopeScaledDepthBias = rasterizer->offset_scale;
- rastState->depthBiasClamp = rasterizer->offset_clamp;
- } else {
- rastState->depthBias = 0;
- rastState->slopeScaledDepthBias = 0;
- rastState->depthBiasClamp = 0;
- }
-
- /* translate polygon mode, at least for the front==back case */
- rastState->fillMode = swr_convert_fill_mode(rasterizer->fill_front);
-
- struct pipe_surface *zb = fb->zsbuf;
- if (zb && swr_resource(zb->texture)->has_depth)
- rastState->depthFormat = swr_resource(zb->texture)->swr.format;
-
- rastState->depthClipEnable = rasterizer->depth_clip_near;
- rastState->clipEnable = rasterizer->depth_clip_near | rasterizer->depth_clip_far;
- rastState->clipHalfZ = rasterizer->clip_halfz;
-
- ctx->api.pfnSwrSetRastState(ctx->swrContext, rastState);
- }
-
- /* Viewport */
- if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
- | SWR_NEW_RASTERIZER)) {
- pipe_viewport_state *state = &ctx->viewports[0];
- pipe_framebuffer_state *fb = &ctx->framebuffer;
- pipe_rasterizer_state *rasterizer = ctx->rasterizer;
-
- SWR_VIEWPORT *vp = &ctx->derived.vp[0];
- SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
-
- for (unsigned i = 0; i < KNOB_NUM_VIEWPORTS_SCISSORS; i++) {
- vp->x = state->translate[0] - state->scale[0];
- vp->width = 2 * state->scale[0];
- vp->y = state->translate[1] - fabs(state->scale[1]);
- vp->height = 2 * fabs(state->scale[1]);
- util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
- &vp->minZ, &vp->maxZ);
-
- if (rasterizer->depth_clip_near) {
- vp->minZ = 0.0f;
- }
-
- if (rasterizer->depth_clip_far) {
- vp->maxZ = 1.0f;
- }
-
- vpm->m00[i] = state->scale[0];
- vpm->m11[i] = state->scale[1];
- vpm->m22[i] = state->scale[2];
- vpm->m30[i] = state->translate[0];
- vpm->m31[i] = state->translate[1];
- vpm->m32[i] = state->translate[2];
-
- /* Now that the matrix is calculated, clip the view coords to screen
- * size. OpenGL allows for -ve x,y in the viewport. */
- if (vp->x < 0.0f) {
- vp->width += vp->x;
- vp->x = 0.0f;
- }
- if (vp->y < 0.0f) {
- vp->height += vp->y;
- vp->y = 0.0f;
- }
- vp->width = std::min(vp->width, (float) fb->width - vp->x);
- vp->height = std::min(vp->height, (float) fb->height - vp->y);
-
- vp++;
- state++;
- }
- ctx->api.pfnSwrSetViewports(ctx->swrContext, KNOB_NUM_VIEWPORTS_SCISSORS,
- &ctx->derived.vp[0], &ctx->derived.vpm);
- }
-
- /* When called from swr_clear (p_draw_info = null), render targets,
- * rasterState and viewports (dependent on render targets) are the only
- * necessary validation. Defer remaining validation by setting
- * post_update_dirty_flags and clear all dirty flags. BackendState is
- * still unconditionally validated below */
- if (!p_draw_info) {
- post_update_dirty_flags = ctx->dirty & ~(SWR_NEW_FRAMEBUFFER |
- SWR_NEW_RASTERIZER |
- SWR_NEW_VIEWPORT);
- ctx->dirty = 0;
- }
-
- /* Scissor */
- if (ctx->dirty & SWR_NEW_SCISSOR) {
- ctx->api.pfnSwrSetScissorRects(ctx->swrContext, KNOB_NUM_VIEWPORTS_SCISSORS, ctx->swr_scissors);
- }
-
- /* Set vertex & index buffers */
- if (ctx->dirty & SWR_NEW_VERTEX) {
- const struct pipe_draw_info &info = *p_draw_info;
-
- /* vertex buffers */
- SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
- for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
- uint32_t size = 0, pitch = 0, elems = 0, partial_inbounds = 0;
- uint32_t min_vertex_index = 0;
- const uint8_t *p_data;
- struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
-
- pitch = vb->stride;
- if (vb->is_user_buffer) {
- /* Client buffer
- * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
- * revalidate on each draw */
- post_update_dirty_flags |= SWR_NEW_VERTEX;
-
- uint32_t base;
- swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size, draw->index_bias);
- partial_inbounds = 0;
- min_vertex_index = info.min_index + (info.index_size ? draw->index_bias : 0);
-
- size = AlignUp(size, 4);
- /* If size of client memory copy is too large, don't copy. The
- * draw will access user-buffer directly and then block. This is
- * faster than queuing many large client draws. */
- if (size >= screen->client_copy_limit) {
- post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW;
- p_data = (const uint8_t *) vb->buffer.user;
- } else {
- /* Copy only needed vertices to scratch space */
- const void *ptr = (const uint8_t *) vb->buffer.user + base;
- ptr = (uint8_t *)swr_copy_to_scratch_space(
- ctx, &ctx->scratch->vertex_buffer, ptr, size);
- p_data = (const uint8_t *)ptr - base;
- }
- } else if (vb->buffer.resource) {
- /* VBO */
- if (!pitch) {
- /* If pitch=0 (ie vb->stride), buffer contains a single
- * constant attribute. Use the stream_pitch which was
- * calculated during creation of vertex_elements_state for the
- * size of the attribute. */
- size = ctx->velems->stream_pitch[i];
- elems = 1;
- partial_inbounds = 0;
- min_vertex_index = 0;
- } else {
- /* size is based on buffer->width0 rather than info.max_index
- * to prevent having to validate VBO on each draw. */
- size = vb->buffer.resource->width0;
- elems = size / pitch;
- partial_inbounds = size % pitch;
- min_vertex_index = 0;
- }
-
- p_data = swr_resource_data(vb->buffer.resource) + vb->buffer_offset;
- } else
- p_data = NULL;
-
- swrVertexBuffers[i] = {0};
- swrVertexBuffers[i].index = i;
- swrVertexBuffers[i].pitch = pitch;
- swrVertexBuffers[i].xpData = (gfxptr_t) p_data;
- swrVertexBuffers[i].size = size;
- swrVertexBuffers[i].minVertex = min_vertex_index;
- swrVertexBuffers[i].maxVertex = elems;
- swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
- }
-
- ctx->api.pfnSwrSetVertexBuffers(
- ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
-
- /* index buffer, if required (info passed in by swr_draw_vbo) */
- SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
- if (info.index_size) {
- const uint8_t *p_data;
- uint32_t size, pitch;
-
- pitch = info.index_size ? info.index_size : sizeof(uint32_t);
- index_type = swr_convert_index_type(pitch);
-
- if (!info.has_user_indices) {
- /* VBO
- * size is based on buffer->width0 rather than info.count
- * to prevent having to validate VBO on each draw */
- size = info.index.resource->width0;
- p_data = swr_resource_data(info.index.resource);
- } else {
- /* Client buffer
- * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
- * revalidate on each draw */
- post_update_dirty_flags |= SWR_NEW_VERTEX;
-
- size = draw->count * pitch;
-
- size = AlignUp(size, 4);
- /* If size of client memory copy is too large, don't copy. The
- * draw will access user-buffer directly and then block. This is
- * faster than queuing many large client draws. */
- if (size >= screen->client_copy_limit) {
- post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW;
- p_data = (const uint8_t *) info.index.user +
- draw->start * info.index_size;
- } else {
- /* Copy indices to scratch space */
- const void *ptr = (char*)info.index.user +
- draw->start * info.index_size;
- ptr = swr_copy_to_scratch_space(
- ctx, &ctx->scratch->index_buffer, ptr, size);
- p_data = (const uint8_t *)ptr;
- }
- }
-
- SWR_INDEX_BUFFER_STATE swrIndexBuffer;
- swrIndexBuffer.format = swr_convert_index_type(info.index_size);
- swrIndexBuffer.xpIndices = (gfxptr_t) p_data;
- swrIndexBuffer.size = size;
-
- ctx->api.pfnSwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
- }
-
- struct swr_vertex_element_state *velems = ctx->velems;
- if (velems && velems->fsState.indexType != index_type) {
- velems->fsFunc = NULL;
- velems->fsState.indexType = index_type;
- }
- }
-
- /* GeometryShader */
- if (ctx->dirty & (SWR_NEW_GS |
- SWR_NEW_VS |
- SWR_NEW_TCS |
- SWR_NEW_TES |
- SWR_NEW_SAMPLER |
- SWR_NEW_SAMPLER_VIEW)) {
- if (ctx->gs) {
- swr_jit_gs_key key;
- swr_generate_gs_key(key, ctx, ctx->gs);
- auto search = ctx->gs->map.find(key);
- PFN_GS_FUNC func;
- if (search != ctx->gs->map.end()) {
- func = search->second->shader;
- } else {
- func = swr_compile_gs(ctx, key);
- }
- ctx->api.pfnSwrSetGsFunc(ctx->swrContext, func);
-
- /* JIT sampler state */
- if (ctx->dirty & SWR_NEW_SAMPLER) {
- swr_update_sampler_state(ctx,
- PIPE_SHADER_GEOMETRY,
- key.nr_samplers,
- ctx->swrDC.samplersGS);
- }
-
- /* JIT sampler view state */
- if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
- swr_update_texture_state(ctx,
- PIPE_SHADER_GEOMETRY,
- key.nr_sampler_views,
- ctx->swrDC.texturesGS);
- }
-
- ctx->api.pfnSwrSetGsState(ctx->swrContext, &ctx->gs->gsState);
- } else {
- SWR_GS_STATE state = { 0 };
- ctx->api.pfnSwrSetGsState(ctx->swrContext, &state);
- ctx->api.pfnSwrSetGsFunc(ctx->swrContext, NULL);
- }
- }
-
- // We may need to restore tessellation state
- // This restored state may be however overwritten
- // during shader compilation
- if (ctx->dirty & SWR_NEW_TS) {
- if (ctx->tes != nullptr) {
- ctx->tsState = ctx->tes->ts_state;
- ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState);
- } else {
- SWR_TS_STATE state = { 0 };
- ctx->api.pfnSwrSetTsState(ctx->swrContext, &state);
- }
- }
-
- // Tessellation Evaluation Shader
- // Compile TES first, because TCS is optional
- if (ctx->dirty & (SWR_NEW_GS |
- SWR_NEW_VS |
- SWR_NEW_TCS |
- SWR_NEW_TES |
- SWR_NEW_SAMPLER |
- SWR_NEW_SAMPLER_VIEW)) {
- if (ctx->tes) {
- swr_jit_tes_key key;
- swr_generate_tes_key(key, ctx, ctx->tes);
-
- auto search = ctx->tes->map.find(key);
- PFN_TES_FUNC func;
- if (search != ctx->tes->map.end()) {
- func = search->second->shader;
- } else {
- func = swr_compile_tes(ctx, key);
- }
-
- ctx->api.pfnSwrSetDsFunc(ctx->swrContext, func);
-
- /* JIT sampler state */
- if (ctx->dirty & SWR_NEW_SAMPLER) {
- swr_update_sampler_state(ctx,
- PIPE_SHADER_TESS_EVAL,
- key.nr_samplers,
- ctx->swrDC.samplersTES);
- }
-
- /* JIT sampler view state */
- if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
- swr_update_texture_state(ctx,
- PIPE_SHADER_TESS_EVAL,
- key.nr_sampler_views,
- ctx->swrDC.texturesTES);
- }
-
- // Update tessellation state in case it's been updated
- ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState);
- } else {
- ctx->api.pfnSwrSetDsFunc(ctx->swrContext, NULL);
- }
- }
-
- /* Tessellation Control Shader */
- if (ctx->dirty & (SWR_NEW_GS |
- SWR_NEW_VS |
- SWR_NEW_TCS |
- SWR_NEW_TES |
- SWR_NEW_SAMPLER |
- SWR_NEW_SAMPLER_VIEW)) {
- if (ctx->tcs) {
- ctx->tcs->vertices_per_patch = ctx->patch_vertices;
-
- swr_jit_tcs_key key;
- swr_generate_tcs_key(key, ctx, ctx->tcs);
-
- auto search = ctx->tcs->map.find(key);
- PFN_TCS_FUNC func;
- if (search != ctx->tcs->map.end()) {
- func = search->second->shader;
- } else {
- func = swr_compile_tcs(ctx, key);
- }
-
- ctx->api.pfnSwrSetHsFunc(ctx->swrContext, func);
-
- /* JIT sampler state */
- if (ctx->dirty & SWR_NEW_SAMPLER) {
- swr_update_sampler_state(ctx,
- PIPE_SHADER_TESS_CTRL,
- key.nr_samplers,
- ctx->swrDC.samplersTCS);
- }
-
- /* JIT sampler view state */
- if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
- swr_update_texture_state(ctx,
- PIPE_SHADER_TESS_CTRL,
- key.nr_sampler_views,
- ctx->swrDC.texturesTCS);
- }
-
- // Update tessellation state in case it's been updated
- ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState);
- } else {
- ctx->api.pfnSwrSetHsFunc(ctx->swrContext, NULL);
- }
- }
-
- /* VertexShader */
- if (ctx->dirty
- & (SWR_NEW_VS | SWR_NEW_RASTERIZER | // for clip planes
- SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
- swr_jit_vs_key key;
- swr_generate_vs_key(key, ctx, ctx->vs);
- auto search = ctx->vs->map.find(key);
- PFN_VERTEX_FUNC func;
- if (search != ctx->vs->map.end()) {
- func = search->second->shader;
- } else {
- func = swr_compile_vs(ctx, key);
- }
- ctx->api.pfnSwrSetVertexFunc(ctx->swrContext, func);
-
- /* JIT sampler state */
- if (ctx->dirty & SWR_NEW_SAMPLER) {
- swr_update_sampler_state(
- ctx, PIPE_SHADER_VERTEX, key.nr_samplers, ctx->swrDC.samplersVS);
- }
-
- /* JIT sampler view state */
- if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
- swr_update_texture_state(ctx,
- PIPE_SHADER_VERTEX,
- key.nr_sampler_views,
- ctx->swrDC.texturesVS);
- }
- }
-
- /* work around the fact that poly stipple also affects lines */
- /* and points, since we rasterize them as triangles, too */
- /* Has to be before fragment shader, since it sets SWR_NEW_FS */
- if (p_draw_info) {
- bool new_prim_is_poly =
- (u_reduced_prim(p_draw_info->mode) == PIPE_PRIM_TRIANGLES) &&
- (ctx->derived.rastState.fillMode == SWR_FILLMODE_SOLID);
- if (new_prim_is_poly != ctx->poly_stipple.prim_is_poly) {
- ctx->dirty |= SWR_NEW_FS;
- ctx->poly_stipple.prim_is_poly = new_prim_is_poly;
- }
- }
-
- /* FragmentShader */
- if (ctx->dirty & (SWR_NEW_FS |
- SWR_NEW_VS |
- SWR_NEW_GS |
- SWR_NEW_TES |
- SWR_NEW_TCS |
- SWR_NEW_RASTERIZER |
- SWR_NEW_SAMPLER |
- SWR_NEW_SAMPLER_VIEW |
- SWR_NEW_FRAMEBUFFER)) {
- swr_jit_fs_key key;
- swr_generate_fs_key(key, ctx, ctx->fs);
- auto search = ctx->fs->map.find(key);
- PFN_PIXEL_KERNEL func;
- if (search != ctx->fs->map.end()) {
- func = search->second->shader;
- } else {
- func = swr_compile_fs(ctx, key);
- }
- SWR_PS_STATE psState = {0};
- psState.pfnPixelShader = func;
- psState.killsPixel = ctx->fs->info.base.uses_kill;
- psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
- psState.writesODepth = ctx->fs->info.base.writes_z;
- psState.usesSourceDepth = ctx->fs->info.base.reads_z;
- psState.shadingRate = SWR_SHADING_RATE_PIXEL;
- psState.renderTargetMask = (1 << ctx->framebuffer.nr_cbufs) - 1;
- psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE;
- uint32_t barycentricsMask = 0;
-#if 0
- // when we switch to mesa-master
- if (ctx->fs->info.base.uses_persp_center ||
- ctx->fs->info.base.uses_linear_center)
- barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
- if (ctx->fs->info.base.uses_persp_centroid ||
- ctx->fs->info.base.uses_linear_centroid)
- barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
- if (ctx->fs->info.base.uses_persp_sample ||
- ctx->fs->info.base.uses_linear_sample)
- barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
-#else
- for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
- switch (ctx->fs->info.base.input_interpolate_loc[i]) {
- case TGSI_INTERPOLATE_LOC_CENTER:
- barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
- break;
- case TGSI_INTERPOLATE_LOC_CENTROID:
- barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
- break;
- case TGSI_INTERPOLATE_LOC_SAMPLE:
- barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
- break;
- }
- }
-#endif
- psState.barycentricsMask = barycentricsMask;
- psState.usesUAV = false; // XXX
- psState.forceEarlyZ = false;
- ctx->api.pfnSwrSetPixelShaderState(ctx->swrContext, &psState);
-
- /* JIT sampler state */
- if (ctx->dirty & (SWR_NEW_SAMPLER |
- SWR_NEW_FS)) {
- swr_update_sampler_state(ctx,
- PIPE_SHADER_FRAGMENT,
- key.nr_samplers,
- ctx->swrDC.samplersFS);
- }
-
- /* JIT sampler view state */
- if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW |
- SWR_NEW_FRAMEBUFFER |
- SWR_NEW_FS)) {
- swr_update_texture_state(ctx,
- PIPE_SHADER_FRAGMENT,
- key.nr_sampler_views,
- ctx->swrDC.texturesFS);
- }
- }
-
-
- /* VertexShader Constants */
- if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
- swr_update_constants(ctx, PIPE_SHADER_VERTEX);
- }
-
- /* FragmentShader Constants */
- if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
- swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
- }
-
- /* GeometryShader Constants */
- if (ctx->dirty & SWR_NEW_GSCONSTANTS) {
- swr_update_constants(ctx, PIPE_SHADER_GEOMETRY);
- }
-
- /* Tessellation Control Shader Constants */
- if (ctx->dirty & SWR_NEW_TCSCONSTANTS) {
- swr_update_constants(ctx, PIPE_SHADER_TESS_CTRL);
- }
-
- /* Tessellation Evaluation Shader Constants */
- if (ctx->dirty & SWR_NEW_TESCONSTANTS) {
- swr_update_constants(ctx, PIPE_SHADER_TESS_EVAL);
- }
-
- /* Depth/stencil state */
- if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
- struct pipe_depth_stencil_alpha_state *depth = ctx->depth_stencil;
- struct pipe_stencil_state *stencil = depth->stencil;
- SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
- SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
-
- /* XXX, incomplete. Need to flesh out stencil & alpha test state
- struct pipe_stencil_state *front_stencil =
- ctx->depth_stencil.stencil[0];
- struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
- */
- if (stencil[0].enabled) {
- depthStencilState.stencilWriteEnable = 1;
- depthStencilState.stencilTestEnable = 1;
- depthStencilState.stencilTestFunc =
- swr_convert_depth_func(stencil[0].func);
-
- depthStencilState.stencilPassDepthPassOp =
- swr_convert_stencil_op(stencil[0].zpass_op);
- depthStencilState.stencilPassDepthFailOp =
- swr_convert_stencil_op(stencil[0].zfail_op);
- depthStencilState.stencilFailOp =
- swr_convert_stencil_op(stencil[0].fail_op);
- depthStencilState.stencilWriteMask = stencil[0].writemask;
- depthStencilState.stencilTestMask = stencil[0].valuemask;
- depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
- }
- if (stencil[1].enabled) {
- depthStencilState.doubleSidedStencilTestEnable = 1;
-
- depthStencilState.backfaceStencilTestFunc =
- swr_convert_depth_func(stencil[1].func);
-
- depthStencilState.backfaceStencilPassDepthPassOp =
- swr_convert_stencil_op(stencil[1].zpass_op);
- depthStencilState.backfaceStencilPassDepthFailOp =
- swr_convert_stencil_op(stencil[1].zfail_op);
- depthStencilState.backfaceStencilFailOp =
- swr_convert_stencil_op(stencil[1].fail_op);
- depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
- depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
-
- depthStencilState.backfaceStencilRefValue =
- ctx->stencil_ref.ref_value[1];
- }
-
- depthStencilState.depthTestEnable = depth->depth_enabled;
- depthStencilState.depthTestFunc = swr_convert_depth_func(depth->depth_func);
- depthStencilState.depthWriteEnable = depth->depth_writemask;
- ctx->api.pfnSwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
-
- depthBoundsState.depthBoundsTestEnable = depth->depth_bounds_test;
- depthBoundsState.depthBoundsTestMinValue = depth->depth_bounds_min;
- depthBoundsState.depthBoundsTestMaxValue = depth->depth_bounds_max;
- ctx->api.pfnSwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
- }
-
- /* Blend State */
- if (ctx->dirty & (SWR_NEW_BLEND |
- SWR_NEW_RASTERIZER |
- SWR_NEW_FRAMEBUFFER |
- SWR_NEW_DEPTH_STENCIL_ALPHA)) {
- struct pipe_framebuffer_state *fb = &ctx->framebuffer;
-
- SWR_BLEND_STATE blendState;
- memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
- blendState.constantColor[0] = ctx->blend_color.color[0];
- blendState.constantColor[1] = ctx->blend_color.color[1];
- blendState.constantColor[2] = ctx->blend_color.color[2];
- blendState.constantColor[3] = ctx->blend_color.color[3];
- blendState.alphaTestReference =
- *((uint32_t*)&ctx->depth_stencil->alpha_ref_value);
-
- blendState.sampleMask = ctx->sample_mask;
- blendState.sampleCount = GetSampleCount(fb->samples);
-
- /* If there are no color buffers bound, disable writes on RT0
- * and skip loop */
- if (fb->nr_cbufs == 0) {
- blendState.renderTarget[0].writeDisableRed = 1;
- blendState.renderTarget[0].writeDisableGreen = 1;
- blendState.renderTarget[0].writeDisableBlue = 1;
- blendState.renderTarget[0].writeDisableAlpha = 1;
- ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, 0, NULL);
- }
- else
- for (int target = 0;
- target < std::min(SWR_NUM_RENDERTARGETS,
- PIPE_MAX_COLOR_BUFS);
- target++) {
- if (!fb->cbufs[target])
- continue;
-
- struct swr_resource *colorBuffer =
- swr_resource(fb->cbufs[target]->texture);
-
- BLEND_COMPILE_STATE compileState;
- memset(&compileState, 0, sizeof(compileState));
- compileState.format = colorBuffer->swr.format;
- memcpy(&compileState.blendState,
- &ctx->blend->compileState[target],
- sizeof(compileState.blendState));
-
- const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
- if (compileState.blendState.logicOpEnable &&
- ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
- compileState.blendState.logicOpEnable = false;
- }
-
- if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
- compileState.blendState.blendEnable = false;
-
- if (compileState.blendState.blendEnable == false &&
- compileState.blendState.logicOpEnable == false &&
- ctx->depth_stencil->alpha_enabled == 0) {
- ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, NULL);
- continue;
- }
-
- compileState.desc.alphaTestEnable =
- ctx->depth_stencil->alpha_enabled;
- compileState.desc.independentAlphaBlendEnable =
- (compileState.blendState.sourceBlendFactor !=
- compileState.blendState.sourceAlphaBlendFactor) ||
- (compileState.blendState.destBlendFactor !=
- compileState.blendState.destAlphaBlendFactor) ||
- (compileState.blendState.colorBlendFunc !=
- compileState.blendState.alphaBlendFunc);
- compileState.desc.alphaToCoverageEnable =
- ctx->blend->pipe.alpha_to_coverage;
- compileState.desc.sampleMaskEnable = (blendState.sampleMask != 0);
- compileState.desc.numSamples = fb->samples;
-
- compileState.alphaTestFunction =
- swr_convert_depth_func(ctx->depth_stencil->alpha_func);
- compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
-
- compileState.Canonicalize();
-
- PFN_BLEND_JIT_FUNC func = NULL;
- auto search = ctx->blendJIT->find(compileState);
- if (search != ctx->blendJIT->end()) {
- func = search->second;
- } else {
- HANDLE hJitMgr = screen->hJitMgr;
- func = JitCompileBlend(hJitMgr, compileState);
- debug_printf("BLEND shader %p\n", func);
- assert(func && "Error: BlendShader = NULL");
-
- ctx->blendJIT->insert(std::make_pair(compileState, func));
- }
- ctx->api.pfnSwrSetBlendFunc(ctx->swrContext, target, func);
- }
-
- ctx->api.pfnSwrSetBlendState(ctx->swrContext, &blendState);
- }
-
- if (ctx->dirty & SWR_NEW_STIPPLE) {
- swr_update_poly_stipple(ctx);
- }
-
- if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_TCS | SWR_NEW_TES | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
- ctx->vs->soState.rasterizerDisable =
- ctx->rasterizer->rasterizer_discard;
- ctx->api.pfnSwrSetSoState(ctx->swrContext, &ctx->vs->soState);
-
- pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
-
- for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
- SWR_STREAMOUT_BUFFER buffer = {0};
- if (ctx->so_targets[i]) {
- buffer.enable = true;
- buffer.pBuffer =
- (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
- ctx->so_targets[i]->buffer_offset);
- buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
- buffer.pitch = stream_output->stride[i];
- buffer.streamOffset = 0;
- }
-
- ctx->api.pfnSwrSetSoBuffers(ctx->swrContext, &buffer, i);
- }
- }
-
-
- if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
- // shader exporting clip distances overrides all user clip planes
- if (ctx->rasterizer->clip_plane_enable &&
- !swr_get_last_fe(ctx)->num_written_clipdistance)
- {
- swr_draw_context *pDC = &ctx->swrDC;
- memcpy(pDC->userClipPlanes,
- ctx->clip.ucp,
- sizeof(pDC->userClipPlanes));
- }
- }
-
- // set up backend state
- SWR_BACKEND_STATE backendState = {0};
- if (ctx->gs) {
- backendState.numAttributes = ctx->gs->info.base.num_outputs - 1;
- } else
- if (ctx->tes) {
- backendState.numAttributes = ctx->tes->info.base.num_outputs - 1;
- // no case for TCS, because if TCS is active, TES must be active
- // as well - pipeline stages after tessellation does not support patches
- } else {
- backendState.numAttributes = ctx->vs->info.base.num_outputs - 1;
- if (ctx->fs->info.base.uses_primid) {
- backendState.numAttributes++;
- backendState.swizzleEnable = true;
- for (unsigned i = 0; i < sizeof(backendState.numComponents); i++) {
- backendState.swizzleMap[i].sourceAttrib = i;
- }
- backendState.swizzleMap[ctx->vs->info.base.num_outputs - 1].constantSource =
- SWR_CONSTANT_SOURCE_PRIM_ID;
- backendState.swizzleMap[ctx->vs->info.base.num_outputs - 1].componentOverrideMask = 1;
- }
- }
- if (ctx->rasterizer->sprite_coord_enable)
- backendState.numAttributes++;
-
- backendState.numAttributes = std::min((size_t)backendState.numAttributes,
- sizeof(backendState.numComponents));
- for (unsigned i = 0; i < backendState.numAttributes; i++)
- backendState.numComponents[i] = 4;
- backendState.constantInterpolationMask = ctx->fs->constantMask |
- (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
- backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
-
- struct tgsi_shader_info *pLastFE = swr_get_last_fe(ctx);
-
- backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
- backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
- backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
-
- backendState.clipDistanceMask =
- pLastFE->num_written_clipdistance ?
- pLastFE->clipdist_writemask & ctx->rasterizer->clip_plane_enable :
- ctx->rasterizer->clip_plane_enable;
-
- backendState.cullDistanceMask =
- pLastFE->culldist_writemask << pLastFE->num_written_clipdistance;
-
- // Assume old layout of SGV, POSITION, CLIPCULL, ATTRIB
- backendState.vertexClipCullOffset = backendState.vertexAttribOffset - 2;
-
- ctx->api.pfnSwrSetBackendState(ctx->swrContext, &backendState);
-
- /* Ensure that any in-progress attachment change StoreTiles finish */
- if (swr_is_fence_pending(screen->flush_fence))
- swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
-
- /* Finally, update the in-use status of all resources involved in draw */
- swr_update_resource_status(pipe, p_draw_info);
-
- ctx->dirty = post_update_dirty_flags;
-}
-
-
-static struct pipe_stream_output_target *
-swr_create_so_target(struct pipe_context *pipe,
- struct pipe_resource *buffer,
- unsigned buffer_offset,
- unsigned buffer_size)
-{
- struct pipe_stream_output_target *target;
-
- target = CALLOC_STRUCT(pipe_stream_output_target);
- if (!target)
- return NULL;
-
- target->context = pipe;
- target->reference.count = 1;
- pipe_resource_reference(&target->buffer, buffer);
- target->buffer_offset = buffer_offset;
- target->buffer_size = buffer_size;
- return target;
-}
-
-static void
-swr_destroy_so_target(struct pipe_context *pipe,
- struct pipe_stream_output_target *target)
-{
- pipe_resource_reference(&target->buffer, NULL);
- FREE(target);
-}
-
-static void
-swr_set_so_targets(struct pipe_context *pipe,
- unsigned num_targets,
- struct pipe_stream_output_target **targets,
- const unsigned *offsets)
-{
- struct swr_context *swr = swr_context(pipe);
- uint32_t i;
-
- assert(num_targets <= MAX_SO_STREAMS);
-
- for (i = 0; i < num_targets; i++) {
- pipe_so_target_reference(
- (struct pipe_stream_output_target **)&swr->so_targets[i],
- targets[i]);
- }
-
- for (/* fall-through */; i < swr->num_so_targets; i++) {
- pipe_so_target_reference(
- (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
- }
-
- swr->num_so_targets = num_targets;
- swr->swrDC.soPrims = &swr->so_primCounter;
-
- swr->dirty |= SWR_NEW_SO;
-}
-
-static void
-swr_set_patch_vertices(struct pipe_context *pipe, uint8_t patch_vertices)
-{
- struct swr_context *swr = swr_context(pipe);
-
- swr->patch_vertices = patch_vertices;
-}
-
-
-void
-swr_state_init(struct pipe_context *pipe)
-{
- pipe->create_blend_state = swr_create_blend_state;
- pipe->bind_blend_state = swr_bind_blend_state;
- pipe->delete_blend_state = swr_delete_blend_state;
-
- pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
- pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
- pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
-
- pipe->create_rasterizer_state = swr_create_rasterizer_state;
- pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
- pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
-
- pipe->create_sampler_state = swr_create_sampler_state;
- pipe->bind_sampler_states = swr_bind_sampler_states;
- pipe->delete_sampler_state = swr_delete_sampler_state;
-
- pipe->create_sampler_view = swr_create_sampler_view;
- pipe->set_sampler_views = swr_set_sampler_views;
- pipe->sampler_view_destroy = swr_sampler_view_destroy;
-
- pipe->create_vs_state = swr_create_vs_state;
- pipe->bind_vs_state = swr_bind_vs_state;
- pipe->delete_vs_state = swr_delete_vs_state;
-
- pipe->create_fs_state = swr_create_fs_state;
- pipe->bind_fs_state = swr_bind_fs_state;
- pipe->delete_fs_state = swr_delete_fs_state;
-
- pipe->create_gs_state = swr_create_gs_state;
- pipe->bind_gs_state = swr_bind_gs_state;
- pipe->delete_gs_state = swr_delete_gs_state;
-
- pipe->create_tcs_state = swr_create_tcs_state;
- pipe->bind_tcs_state = swr_bind_tcs_state;
- pipe->delete_tcs_state = swr_delete_tcs_state;
-
- pipe->create_tes_state = swr_create_tes_state;
- pipe->bind_tes_state = swr_bind_tes_state;
- pipe->delete_tes_state = swr_delete_tes_state;
-
- pipe->set_constant_buffer = swr_set_constant_buffer;
-
- pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
- pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
- pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
-
- pipe->set_vertex_buffers = swr_set_vertex_buffers;
-
- pipe->set_polygon_stipple = swr_set_polygon_stipple;
- pipe->set_clip_state = swr_set_clip_state;
- pipe->set_scissor_states = swr_set_scissor_states;
- pipe->set_viewport_states = swr_set_viewport_states;
-
- pipe->set_framebuffer_state = swr_set_framebuffer_state;
-
- pipe->set_blend_color = swr_set_blend_color;
- pipe->set_stencil_ref = swr_set_stencil_ref;
-
- pipe->set_sample_mask = swr_set_sample_mask;
- pipe->get_sample_position = swr_get_sample_position;
-
- pipe->create_stream_output_target = swr_create_so_target;
- pipe->stream_output_target_destroy = swr_destroy_so_target;
- pipe->set_stream_output_targets = swr_set_so_targets;
-
- pipe->set_patch_vertices = swr_set_patch_vertices;
-}
diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
deleted file mode 100644
index 75a70de0b1a..00000000000
--- a/src/gallium/drivers/swr/swr_state.h
+++ /dev/null
@@ -1,426 +0,0 @@
-/****************************************************************************
- * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- ***************************************************************************/
-
-#ifndef SWR_STATE_H
-#define SWR_STATE_H
-
-#include "pipe/p_defines.h"
-#include "tgsi/tgsi_scan.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_dump.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "util/crc32.h"
-#include "api.h"
-#include "swr_tex_sample.h"
-#include "swr_shader.h"
-#include <unordered_map>
-#include <memory>
-
-template <typename T>
-struct ShaderVariant {
- struct gallivm_state *gallivm;
- T shader;
-
- ShaderVariant(struct gallivm_state *gs, T code) : gallivm(gs), shader(code) {}
- ~ShaderVariant() { gallivm_destroy(gallivm); }
-};
-
-using PFN_TCS_FUNC = PFN_HS_FUNC;
-using PFN_TES_FUNC = PFN_DS_FUNC;
-
-typedef ShaderVariant<PFN_VERTEX_FUNC> VariantVS;
-typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS;
-typedef ShaderVariant<PFN_GS_FUNC> VariantGS;
-typedef ShaderVariant<PFN_TCS_FUNC> VariantTCS;
-typedef ShaderVariant<PFN_TES_FUNC> VariantTES;
-
-/* skeleton */
-struct swr_vertex_shader {
- struct pipe_shader_state pipe;
- struct lp_tgsi_info info;
- std::unordered_map<swr_jit_vs_key, std::unique_ptr<VariantVS>> map;
- SWR_STREAMOUT_STATE soState;
- PFN_SO_FUNC soFunc[PIPE_PRIM_MAX] {0};
-};
-
-struct swr_fragment_shader {
- struct pipe_shader_state pipe;
- struct lp_tgsi_info info;
- uint32_t constantMask;
- uint32_t flatConstantMask;
- uint32_t pointSpriteMask;
- std::unordered_map<swr_jit_fs_key, std::unique_ptr<VariantFS>> map;
-};
-
-struct swr_geometry_shader {
- struct pipe_shader_state pipe;
- struct lp_tgsi_info info;
- SWR_GS_STATE gsState;
-
- std::unordered_map<swr_jit_gs_key, std::unique_ptr<VariantGS>> map;
-};
-
-struct swr_tess_control_shader {
- struct pipe_shader_state pipe;
- struct lp_tgsi_info info;
- uint32_t vertices_per_patch;
-
- std::unordered_map<swr_jit_tcs_key, std::unique_ptr<VariantTCS>> map;
-};
-
-struct swr_tess_evaluation_shader {
- struct pipe_shader_state pipe;
- struct lp_tgsi_info info;
- SWR_TS_STATE ts_state;
-
- std::unordered_map<swr_jit_tes_key, std::unique_ptr<VariantTES>> map;
-};
-
-
-/* Vertex element state */
-struct swr_vertex_element_state {
- FETCH_COMPILE_STATE fsState;
- PFN_FETCH_FUNC fsFunc {NULL};
- uint32_t stream_pitch[PIPE_MAX_ATTRIBS] {0};
- uint32_t min_instance_div[PIPE_MAX_ATTRIBS] {0};
- uint32_t instanced_bufs {0};
- std::unordered_map<swr_jit_fetch_key, PFN_FETCH_FUNC> map;
-};
-
-struct swr_blend_state {
- struct pipe_blend_state pipe;
- SWR_BLEND_STATE blendState;
- RENDER_TARGET_BLEND_COMPILE_STATE compileState[PIPE_MAX_COLOR_BUFS];
-};
-
-struct swr_poly_stipple {
- struct pipe_poly_stipple pipe;
- bool prim_is_poly;
-};
-
-/*
- * Derived SWR API DrawState
- * For convenience of making simple changes without re-deriving state.
- */
-struct swr_derived_state {
- SWR_RASTSTATE rastState;
- SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_VIEWPORT_MATRICES vpm;
-};
-
-void swr_update_derived(struct pipe_context *,
- const struct pipe_draw_info * = nullptr,
- const struct pipe_draw_start_count_bias *draw = nullptr);
-
-/*
- * Conversion functions: Convert mesa state defines to SWR.
- */
-
-static INLINE SWR_LOGIC_OP
-swr_convert_logic_op(const UINT op)
-{
- switch (op) {
- case PIPE_LOGICOP_CLEAR:
- return LOGICOP_CLEAR;
- case PIPE_LOGICOP_NOR:
- return LOGICOP_NOR;
- case PIPE_LOGICOP_AND_INVERTED:
- return LOGICOP_AND_INVERTED;
- case PIPE_LOGICOP_COPY_INVERTED:
- return LOGICOP_COPY_INVERTED;
- case PIPE_LOGICOP_AND_REVERSE:
- return LOGICOP_AND_REVERSE;
- case PIPE_LOGICOP_INVERT:
- return LOGICOP_INVERT;
- case PIPE_LOGICOP_XOR:
- return LOGICOP_XOR;
- case PIPE_LOGICOP_NAND:
- return LOGICOP_NAND;
- case PIPE_LOGICOP_AND:
- return LOGICOP_AND;
- case PIPE_LOGICOP_EQUIV:
- return LOGICOP_EQUIV;
- case PIPE_LOGICOP_NOOP:
- return LOGICOP_NOOP;
- case PIPE_LOGICOP_OR_INVERTED:
- return LOGICOP_OR_INVERTED;
- case PIPE_LOGICOP_COPY:
- return LOGICOP_COPY;
- case PIPE_LOGICOP_OR_REVERSE:
- return LOGICOP_OR_REVERSE;
- case PIPE_LOGICOP_OR:
- return LOGICOP_OR;
- case PIPE_LOGICOP_SET:
- return LOGICOP_SET;
- default:
- assert(0 && "Unsupported logic op");
- return LOGICOP_NOOP;
- }
-}
-
-static INLINE SWR_STENCILOP
-swr_convert_stencil_op(const UINT op)
-{
- switch (op) {
- case PIPE_STENCIL_OP_KEEP:
- return STENCILOP_KEEP;
- case PIPE_STENCIL_OP_ZERO:
- return STENCILOP_ZERO;
- case PIPE_STENCIL_OP_REPLACE:
- return STENCILOP_REPLACE;
- case PIPE_STENCIL_OP_INCR:
- return STENCILOP_INCRSAT;
- case PIPE_STENCIL_OP_DECR:
- return STENCILOP_DECRSAT;
- case PIPE_STENCIL_OP_INCR_WRAP:
- return STENCILOP_INCR;
- case PIPE_STENCIL_OP_DECR_WRAP:
- return STENCILOP_DECR;
- case PIPE_STENCIL_OP_INVERT:
- return STENCILOP_INVERT;
- default:
- assert(0 && "Unsupported stencil op");
- return STENCILOP_KEEP;
- }
-}
-
-static INLINE SWR_FORMAT
-swr_convert_index_type(const UINT index_size)
-{
- switch (index_size) {
- case sizeof(unsigned char):
- return R8_UINT;
- case sizeof(unsigned short):
- return R16_UINT;
- case sizeof(unsigned int):
- return R32_UINT;
- default:
- assert(0 && "Unsupported index type");
- return R32_UINT;
- }
-}
-
-
-static INLINE SWR_ZFUNCTION
-swr_convert_depth_func(const UINT pipe_func)
-{
- switch (pipe_func) {
- case PIPE_FUNC_NEVER:
- return ZFUNC_NEVER;
- case PIPE_FUNC_LESS:
- return ZFUNC_LT;
- case PIPE_FUNC_EQUAL:
- return ZFUNC_EQ;
- case PIPE_FUNC_LEQUAL:
- return ZFUNC_LE;
- case PIPE_FUNC_GREATER:
- return ZFUNC_GT;
- case PIPE_FUNC_NOTEQUAL:
- return ZFUNC_NE;
- case PIPE_FUNC_GEQUAL:
- return ZFUNC_GE;
- case PIPE_FUNC_ALWAYS:
- return ZFUNC_ALWAYS;
- default:
- assert(0 && "Unsupported depth func");
- return ZFUNC_ALWAYS;
- }
-}
-
-
-static INLINE SWR_CULLMODE
-swr_convert_cull_mode(const UINT cull_face)
-{
- switch (cull_face) {
- case PIPE_FACE_NONE:
- return SWR_CULLMODE_NONE;
- case PIPE_FACE_FRONT:
- return SWR_CULLMODE_FRONT;
- case PIPE_FACE_BACK:
- return SWR_CULLMODE_BACK;
- case PIPE_FACE_FRONT_AND_BACK:
- return SWR_CULLMODE_BOTH;
- default:
- assert(0 && "Invalid cull mode");
- return SWR_CULLMODE_NONE;
- }
-}
-
-static INLINE SWR_BLEND_OP
-swr_convert_blend_func(const UINT blend_func)
-{
- switch (blend_func) {
- case PIPE_BLEND_ADD:
- return BLENDOP_ADD;
- case PIPE_BLEND_SUBTRACT:
- return BLENDOP_SUBTRACT;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return BLENDOP_REVSUBTRACT;
- case PIPE_BLEND_MIN:
- return BLENDOP_MIN;
- case PIPE_BLEND_MAX:
- return BLENDOP_MAX;
- default:
- assert(0 && "Invalid blend func");
- return BLENDOP_ADD;
- }
-}
-
-static INLINE SWR_BLEND_FACTOR
-swr_convert_blend_factor(const UINT blend_factor)
-{
- switch (blend_factor) {
- case PIPE_BLENDFACTOR_ONE:
- return BLENDFACTOR_ONE;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return BLENDFACTOR_SRC_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return BLENDFACTOR_SRC_ALPHA;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return BLENDFACTOR_DST_ALPHA;
- case PIPE_BLENDFACTOR_DST_COLOR:
- return BLENDFACTOR_DST_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return BLENDFACTOR_SRC_ALPHA_SATURATE;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return BLENDFACTOR_CONST_COLOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return BLENDFACTOR_CONST_ALPHA;
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- return BLENDFACTOR_SRC1_COLOR;
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- return BLENDFACTOR_SRC1_ALPHA;
- case PIPE_BLENDFACTOR_ZERO:
- return BLENDFACTOR_ZERO;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- return BLENDFACTOR_INV_SRC_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return BLENDFACTOR_INV_SRC_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return BLENDFACTOR_INV_DST_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- return BLENDFACTOR_INV_DST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- return BLENDFACTOR_INV_CONST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return BLENDFACTOR_INV_CONST_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- return BLENDFACTOR_INV_SRC1_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- return BLENDFACTOR_INV_SRC1_ALPHA;
- default:
- assert(0 && "Invalid blend factor");
- return BLENDFACTOR_ONE;
- }
-}
-
-static INLINE enum SWR_SURFACE_TYPE
-swr_convert_target_type(const enum pipe_texture_target target)
-{
- switch (target) {
- case PIPE_BUFFER:
- return SURFACE_BUFFER;
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return SURFACE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_RECT:
- return SURFACE_2D;
- case PIPE_TEXTURE_3D:
- return SURFACE_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return SURFACE_CUBE;
- default:
- assert(0);
- return SURFACE_NULL;
- }
-}
-
-/*
- * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY
- */
-static INLINE enum PRIMITIVE_TOPOLOGY
-swr_convert_prim_topology(const unsigned mode, const unsigned tcs_verts)
-{
- switch (mode) {
- case PIPE_PRIM_POINTS:
- return TOP_POINT_LIST;
- case PIPE_PRIM_LINES:
- return TOP_LINE_LIST;
- case PIPE_PRIM_LINE_LOOP:
- return TOP_LINE_LOOP;
- case PIPE_PRIM_LINE_STRIP:
- return TOP_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES:
- return TOP_TRIANGLE_LIST;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return TOP_TRIANGLE_STRIP;
- case PIPE_PRIM_TRIANGLE_FAN:
- return TOP_TRIANGLE_FAN;
- case PIPE_PRIM_QUADS:
- return TOP_QUAD_LIST;
- case PIPE_PRIM_QUAD_STRIP:
- return TOP_QUAD_STRIP;
- case PIPE_PRIM_POLYGON:
- return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */
- case PIPE_PRIM_LINES_ADJACENCY:
- return TOP_LINE_LIST_ADJ;
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- return TOP_LISTSTRIP_ADJ;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- return TOP_TRI_LIST_ADJ;
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return TOP_TRI_STRIP_ADJ;
- case PIPE_PRIM_PATCHES:
- // rasterizer has a separate type for each possible number of patch vertices
- return (PRIMITIVE_TOPOLOGY)((unsigned)TOP_PATCHLIST_BASE + tcs_verts);
- default:
- assert(0 && "Unknown topology");
- return TOP_UNKNOWN;
- }
-};
-
-/*
- * convert mesa PIPE_POLYGON_MODE_X to SWR enum SWR_FILLMODE
- */
-static INLINE enum SWR_FILLMODE
-swr_convert_fill_mode(const unsigned mode)
-{
- switch(mode) {
- case PIPE_POLYGON_MODE_FILL:
- return SWR_FILLMODE_SOLID;
- case PIPE_POLYGON_MODE_LINE:
- return SWR_FILLMODE_WIREFRAME;
- case PIPE_POLYGON_MODE_POINT:
- return SWR_FILLMODE_POINT;
- default:
- assert(0 && "Unknown fillmode");
- return SWR_FILLMODE_SOLID; // at least do something sensible
- }
-}
-
-
-#endif
diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp
deleted file mode 100644
index 1cf00b29249..00000000000
--- a/src/gallium/drivers/swr/swr_tex_sample.cpp
+++ /dev/null
@@ -1,376 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 VMware, Inc.
- * All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Largely a copy of llvmpipe's lp_tex_sample.c
- */
-
-/**
- * Texture sampling code generation
- *
- * This file is nothing more than ugly glue between three largely independent
- * entities:
- * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
- * - texture sampling code generation (i.e., lp_build_sample_soa)
- * - SWR driver
- *
- * All interesting code is in the functions mentioned above. There is really
- * nothing to see here.
- *
- * @author Jose Fonseca <jfonseca@vmware.com>
- */
-
-#include "state.h"
-#include "JitManager.h"
-#include "gen_state_llvm.h"
-
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "gallivm/lp_bld_debug.h"
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_type.h"
-#include "gallivm/lp_bld_sample.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "util/u_memory.h"
-
-#include "swr_tex_sample.h"
-#include "gen_surf_state_llvm.h"
-#include "gen_swr_context_llvm.h"
-
-using namespace SwrJit;
-
-/**
- * This provides the bridge between the sampler state store in
- * lp_jit_context and lp_jit_texture and the sampler code
- * generator. It provides the texture layout information required by
- * the texture sampler code generator in terms of the state stored in
- * lp_jit_context and lp_jit_texture in runtime.
- */
-struct swr_sampler_dynamic_state {
- struct lp_sampler_dynamic_state base;
-
- const struct swr_sampler_static_state *static_state;
-
- enum pipe_shader_type shader_type;
-};
-
-
-/**
- * This is the bridge between our sampler and the TGSI translator.
- */
-struct swr_sampler_soa {
- struct lp_build_sampler_soa base;
-
- struct swr_sampler_dynamic_state dynamic_state;
-};
-
-
-/**
- * Fetch the specified member of the lp_jit_texture structure.
- * \param emit_load if TRUE, emit the LLVM load instruction to actually
- * fetch the field's value. Otherwise, just emit the
- * GEP code to address the field.
- *
- * @sa http://llvm.org/docs/GetElementPtr.html
- */
-static LLVMValueRef
-swr_texture_member(const struct lp_sampler_dynamic_state *base,
- struct gallivm_state *gallivm,
- LLVMValueRef context_ptr,
- unsigned texture_unit,
- unsigned member_index,
- const char *member_name,
- boolean emit_load)
-{
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef indices[4];
- LLVMValueRef ptr;
- LLVMValueRef res;
-
- assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-
- /* context[0] */
- indices[0] = lp_build_const_int32(gallivm, 0);
- /* context[0].textures */
- auto dynamic = (const struct swr_sampler_dynamic_state *)base;
- switch (dynamic->shader_type) {
- case PIPE_SHADER_FRAGMENT:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS);
- break;
- case PIPE_SHADER_VERTEX:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesVS);
- break;
- case PIPE_SHADER_GEOMETRY:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesGS);
- break;
- case PIPE_SHADER_TESS_CTRL:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesTCS);
- break;
- case PIPE_SHADER_TESS_EVAL:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesTES);
- break;
- default:
- assert(0 && "unsupported shader type");
- break;
- }
- /* context[0].textures[unit] */
- indices[2] = lp_build_const_int32(gallivm, texture_unit);
- /* context[0].textures[unit].member */
- indices[3] = lp_build_const_int32(gallivm, member_index);
-
- ptr = LLVMBuildGEP(builder, context_ptr, indices, ARRAY_SIZE(indices), "");
-
- if (emit_load)
- res = LLVMBuildLoad(builder, ptr, "");
- else
- res = ptr;
-
- lp_build_name(res, "context.texture%u.%s", texture_unit, member_name);
-
- return res;
-}
-
-
-/**
- * Helper macro to instantiate the functions that generate the code to
- * fetch the members of lp_jit_texture to fulfill the sampler code
- * generator requests.
- *
- * This complexity is the price we have to pay to keep the texture
- * sampler code generator a reusable module without dependencies to
- * swr internals.
- */
-#define SWR_TEXTURE_MEMBER(_name, _emit_load) \
- static LLVMValueRef swr_texture_##_name( \
- const struct lp_sampler_dynamic_state *base, \
- struct gallivm_state *gallivm, \
- LLVMValueRef context_ptr, \
- unsigned texture_unit, \
- LLVMValueRef texture_unit_offset) \
- { \
- return swr_texture_member(base, \
- gallivm, \
- context_ptr, \
- texture_unit, \
- swr_jit_texture_##_name, \
- #_name, \
- _emit_load); \
- }
-
-
-SWR_TEXTURE_MEMBER(width, TRUE)
-SWR_TEXTURE_MEMBER(height, TRUE)
-SWR_TEXTURE_MEMBER(depth, TRUE)
-SWR_TEXTURE_MEMBER(first_level, TRUE)
-SWR_TEXTURE_MEMBER(last_level, TRUE)
-SWR_TEXTURE_MEMBER(base_ptr, TRUE)
-SWR_TEXTURE_MEMBER(num_samples, TRUE)
-SWR_TEXTURE_MEMBER(sample_stride, TRUE)
-SWR_TEXTURE_MEMBER(row_stride, FALSE)
-SWR_TEXTURE_MEMBER(img_stride, FALSE)
-SWR_TEXTURE_MEMBER(mip_offsets, FALSE)
-
-
-/**
- * Fetch the specified member of the lp_jit_sampler structure.
- * \param emit_load if TRUE, emit the LLVM load instruction to actually
- * fetch the field's value. Otherwise, just emit the
- * GEP code to address the field.
- *
- * @sa http://llvm.org/docs/GetElementPtr.html
- */
-static LLVMValueRef
-swr_sampler_member(const struct lp_sampler_dynamic_state *base,
- struct gallivm_state *gallivm,
- LLVMValueRef context_ptr,
- unsigned sampler_unit,
- unsigned member_index,
- const char *member_name,
- boolean emit_load)
-{
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef indices[4];
- LLVMValueRef ptr;
- LLVMValueRef res;
-
- assert(sampler_unit < PIPE_MAX_SAMPLERS);
-
- /* context[0] */
- indices[0] = lp_build_const_int32(gallivm, 0);
- /* context[0].samplers */
- auto dynamic = (const struct swr_sampler_dynamic_state *)base;
- switch (dynamic->shader_type) {
- case PIPE_SHADER_FRAGMENT:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS);
- break;
- case PIPE_SHADER_VERTEX:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersVS);
- break;
- case PIPE_SHADER_GEOMETRY:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersGS);
- break;
- case PIPE_SHADER_TESS_CTRL:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersTCS);
- break;
- case PIPE_SHADER_TESS_EVAL:
- indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersTES);
- break;
- default:
- assert(0 && "unsupported shader type");
- break;
- }
- /* context[0].samplers[unit] */
- indices[2] = lp_build_const_int32(gallivm, sampler_unit);
- /* context[0].samplers[unit].member */
- indices[3] = lp_build_const_int32(gallivm, member_index);
-
- ptr = LLVMBuildGEP(builder, context_ptr, indices, ARRAY_SIZE(indices), "");
-
- if (emit_load)
- res = LLVMBuildLoad(builder, ptr, "");
- else
- res = ptr;
-
- lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name);
-
- return res;
-}
-
-
-#define SWR_SAMPLER_MEMBER(_name, _emit_load) \
- static LLVMValueRef swr_sampler_##_name( \
- const struct lp_sampler_dynamic_state *base, \
- struct gallivm_state *gallivm, \
- LLVMValueRef context_ptr, \
- unsigned sampler_unit) \
- { \
- return swr_sampler_member(base, \
- gallivm, \
- context_ptr, \
- sampler_unit, \
- swr_jit_sampler_##_name, \
- #_name, \
- _emit_load); \
- }
-
-
-SWR_SAMPLER_MEMBER(min_lod, TRUE)
-SWR_SAMPLER_MEMBER(max_lod, TRUE)
-SWR_SAMPLER_MEMBER(lod_bias, TRUE)
-SWR_SAMPLER_MEMBER(border_color, FALSE)
-
-
-static void
-swr_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
-{
- FREE(sampler);
-}
-
-
-/**
- * Fetch filtered values from texture.
- * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
- */
-static void
-swr_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
- struct gallivm_state *gallivm,
- const struct lp_sampler_params *params)
-{
- struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base;
- unsigned texture_index = params->texture_index;
- unsigned sampler_index = params->sampler_index;
-
- assert(sampler_index < PIPE_MAX_SAMPLERS);
- assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-
-#if 0
- lp_build_sample_nop(gallivm, params->type, params->coords, params->texel);
-#else
- lp_build_sample_soa(
- &sampler->dynamic_state.static_state[texture_index].texture_state,
- &sampler->dynamic_state.static_state[sampler_index].sampler_state,
- &sampler->dynamic_state.base,
- gallivm,
- params);
-#endif
-}
-
-/**
- * Fetch the texture size.
- */
-static void
-swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
- struct gallivm_state *gallivm,
- const struct lp_sampler_size_query_params *params)
-{
- struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base;
-
- assert(params->texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-
- lp_build_size_query_soa(
- gallivm,
- &sampler->dynamic_state.static_state[params->texture_unit].texture_state,
- &sampler->dynamic_state.base,
- params);
-}
-
-
-struct lp_build_sampler_soa *
-swr_sampler_soa_create(const struct swr_sampler_static_state *static_state,
- enum pipe_shader_type shader_type)
-{
- struct swr_sampler_soa *sampler;
-
- sampler = CALLOC_STRUCT(swr_sampler_soa);
- if (!sampler)
- return NULL;
-
- sampler->base.destroy = swr_sampler_soa_destroy;
- sampler->base.emit_tex_sample = swr_sampler_soa_emit_fetch_texel;
- sampler->base.emit_size_query = swr_sampler_soa_emit_size_query;
- sampler->dynamic_state.base.width = swr_texture_width;
- sampler->dynamic_state.base.height = swr_texture_height;
- sampler->dynamic_state.base.depth = swr_texture_depth;
- sampler->dynamic_state.base.first_level = swr_texture_first_level;
- sampler->dynamic_state.base.last_level = swr_texture_last_level;
- sampler->dynamic_state.base.base_ptr = swr_texture_base_ptr;
- sampler->dynamic_state.base.row_stride = swr_texture_row_stride;
- sampler->dynamic_state.base.img_stride = swr_texture_img_stride;
- sampler->dynamic_state.base.mip_offsets = swr_texture_mip_offsets;
- sampler->dynamic_state.base.num_samples = swr_texture_num_samples;
- sampler->dynamic_state.base.sample_stride = swr_texture_sample_stride;
- sampler->dynamic_state.base.min_lod = swr_sampler_min_lod;
- sampler->dynamic_state.base.max_lod = swr_sampler_max_lod;
- sampler->dynamic_state.base.lod_bias = swr_sampler_lod_bias;
- sampler->dynamic_state.base.border_color = swr_sampler_border_color;
-
- sampler->dynamic_state.static_state = static_state;
-
- sampler->dynamic_state.shader_type = shader_type;
-
- return &sampler->base;
-}
diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h
deleted file mode 100644
index 715ca3c3e19..00000000000
--- a/src/gallium/drivers/swr/swr_tex_sample.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#pragma once
-
-#include "gallivm/lp_bld.h"
-
-struct swr_sampler_static_state {
- /*
- * These attributes are effectively interleaved for more sane key handling.
- * However, there might be lots of null space if the amount of samplers and
- * textures isn't the same.
- */
- struct lp_static_sampler_state sampler_state;
- struct lp_static_texture_state texture_state;
-};
-
-/**
- * Pure-LLVM texture sampling code generator.
- *
- */
-struct lp_build_sampler_soa *
-swr_sampler_soa_create(const struct swr_sampler_static_state *key,
- enum pipe_shader_type shader_type);