summaryrefslogtreecommitdiff
path: root/src/freedreno/decode/instr-a3xx.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/freedreno/decode/instr-a3xx.h')
-rw-r--r--src/freedreno/decode/instr-a3xx.h1115
1 files changed, 1115 insertions, 0 deletions
diff --git a/src/freedreno/decode/instr-a3xx.h b/src/freedreno/decode/instr-a3xx.h
new file mode 100644
index 00000000000..218bdc3e17c
--- /dev/null
+++ b/src/freedreno/decode/instr-a3xx.h
@@ -0,0 +1,1115 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_A3XX_H_
+#define INSTR_A3XX_H_
+
+#define PACKED __attribute__((__packed__))
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+void ir3_assert_handler(const char *expr, const char *file, int line,
+ const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
+
+/* A wrapper for assert() that allows overriding handling of a failed
+ * assert. This is needed for tools like crashdec which can want to
+ * attempt to disassemble memory that might not actually be valid
+ * instructions.
+ */
+#define ir3_assert(expr) do { \
+ if (!(expr)) { \
+ if (ir3_assert_handler) { \
+ ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
+ } \
+ assert(expr); \
+ } \
+ } while (0)
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
+
+typedef enum {
+ /* category 0: */
+ OPC_NOP = _OPC(0, 0),
+ OPC_B = _OPC(0, 1),
+ OPC_JUMP = _OPC(0, 2),
+ OPC_CALL = _OPC(0, 3),
+ OPC_RET = _OPC(0, 4),
+ OPC_KILL = _OPC(0, 5),
+ OPC_END = _OPC(0, 6),
+ OPC_EMIT = _OPC(0, 7),
+ OPC_CUT = _OPC(0, 8),
+ OPC_CHMASK = _OPC(0, 9),
+ OPC_CHSH = _OPC(0, 10),
+ OPC_FLOW_REV = _OPC(0, 11),
+
+ OPC_BKT = _OPC(0, 16),
+ OPC_STKS = _OPC(0, 17),
+ OPC_STKR = _OPC(0, 18),
+ OPC_XSET = _OPC(0, 19),
+ OPC_XCLR = _OPC(0, 20),
+ OPC_GETONE = _OPC(0, 21),
+ OPC_DBG = _OPC(0, 22),
+ OPC_SHPS = _OPC(0, 23), /* shader prologue start */
+ OPC_SHPE = _OPC(0, 24), /* shader prologue end */
+
+ OPC_PREDT = _OPC(0, 29), /* predicated true */
+ OPC_PREDF = _OPC(0, 30), /* predicated false */
+ OPC_PREDE = _OPC(0, 31), /* predicated end */
+
+ /* category 1: */
+ OPC_MOV = _OPC(1, 0),
+
+ /* category 2: */
+ OPC_ADD_F = _OPC(2, 0),
+ OPC_MIN_F = _OPC(2, 1),
+ OPC_MAX_F = _OPC(2, 2),
+ OPC_MUL_F = _OPC(2, 3),
+ OPC_SIGN_F = _OPC(2, 4),
+ OPC_CMPS_F = _OPC(2, 5),
+ OPC_ABSNEG_F = _OPC(2, 6),
+ OPC_CMPV_F = _OPC(2, 7),
+ /* 8 - invalid */
+ OPC_FLOOR_F = _OPC(2, 9),
+ OPC_CEIL_F = _OPC(2, 10),
+ OPC_RNDNE_F = _OPC(2, 11),
+ OPC_RNDAZ_F = _OPC(2, 12),
+ OPC_TRUNC_F = _OPC(2, 13),
+ /* 14-15 - invalid */
+ OPC_ADD_U = _OPC(2, 16),
+ OPC_ADD_S = _OPC(2, 17),
+ OPC_SUB_U = _OPC(2, 18),
+ OPC_SUB_S = _OPC(2, 19),
+ OPC_CMPS_U = _OPC(2, 20),
+ OPC_CMPS_S = _OPC(2, 21),
+ OPC_MIN_U = _OPC(2, 22),
+ OPC_MIN_S = _OPC(2, 23),
+ OPC_MAX_U = _OPC(2, 24),
+ OPC_MAX_S = _OPC(2, 25),
+ OPC_ABSNEG_S = _OPC(2, 26),
+ /* 27 - invalid */
+ OPC_AND_B = _OPC(2, 28),
+ OPC_OR_B = _OPC(2, 29),
+ OPC_NOT_B = _OPC(2, 30),
+ OPC_XOR_B = _OPC(2, 31),
+ /* 32 - invalid */
+ OPC_CMPV_U = _OPC(2, 33),
+ OPC_CMPV_S = _OPC(2, 34),
+ /* 35-47 - invalid */
+ OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
+ OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
+ OPC_MULL_U = _OPC(2, 50),
+ OPC_BFREV_B = _OPC(2, 51),
+ OPC_CLZ_S = _OPC(2, 52),
+ OPC_CLZ_B = _OPC(2, 53),
+ OPC_SHL_B = _OPC(2, 54),
+ OPC_SHR_B = _OPC(2, 55),
+ OPC_ASHR_B = _OPC(2, 56),
+ OPC_BARY_F = _OPC(2, 57),
+ OPC_MGEN_B = _OPC(2, 58),
+ OPC_GETBIT_B = _OPC(2, 59),
+ OPC_SETRM = _OPC(2, 60),
+ OPC_CBITS_B = _OPC(2, 61),
+ OPC_SHB = _OPC(2, 62),
+ OPC_MSAD = _OPC(2, 63),
+
+ /* category 3: */
+ OPC_MAD_U16 = _OPC(3, 0),
+ OPC_MADSH_U16 = _OPC(3, 1),
+ OPC_MAD_S16 = _OPC(3, 2),
+ OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
+ OPC_MAD_U24 = _OPC(3, 4),
+ OPC_MAD_S24 = _OPC(3, 5),
+ OPC_MAD_F16 = _OPC(3, 6),
+ OPC_MAD_F32 = _OPC(3, 7),
+ OPC_SEL_B16 = _OPC(3, 8),
+ OPC_SEL_B32 = _OPC(3, 9),
+ OPC_SEL_S16 = _OPC(3, 10),
+ OPC_SEL_S32 = _OPC(3, 11),
+ OPC_SEL_F16 = _OPC(3, 12),
+ OPC_SEL_F32 = _OPC(3, 13),
+ OPC_SAD_S16 = _OPC(3, 14),
+ OPC_SAD_S32 = _OPC(3, 15),
+
+ /* category 4: */
+ OPC_RCP = _OPC(4, 0),
+ OPC_RSQ = _OPC(4, 1),
+ OPC_LOG2 = _OPC(4, 2),
+ OPC_EXP2 = _OPC(4, 3),
+ OPC_SIN = _OPC(4, 4),
+ OPC_COS = _OPC(4, 5),
+ OPC_SQRT = _OPC(4, 6),
+ /* NOTE that these are 8+opc from their highp equivs, so it's possible
+ * that the high order bit in the opc field has been repurposed for
+ * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
+ * still use the same opc as highp
+ */
+ OPC_HRSQ = _OPC(4, 9),
+ OPC_HLOG2 = _OPC(4, 10),
+ OPC_HEXP2 = _OPC(4, 11),
+
+ /* category 5: */
+ OPC_ISAM = _OPC(5, 0),
+ OPC_ISAML = _OPC(5, 1),
+ OPC_ISAMM = _OPC(5, 2),
+ OPC_SAM = _OPC(5, 3),
+ OPC_SAMB = _OPC(5, 4),
+ OPC_SAML = _OPC(5, 5),
+ OPC_SAMGQ = _OPC(5, 6),
+ OPC_GETLOD = _OPC(5, 7),
+ OPC_CONV = _OPC(5, 8),
+ OPC_CONVM = _OPC(5, 9),
+ OPC_GETSIZE = _OPC(5, 10),
+ OPC_GETBUF = _OPC(5, 11),
+ OPC_GETPOS = _OPC(5, 12),
+ OPC_GETINFO = _OPC(5, 13),
+ OPC_DSX = _OPC(5, 14),
+ OPC_DSY = _OPC(5, 15),
+ OPC_GATHER4R = _OPC(5, 16),
+ OPC_GATHER4G = _OPC(5, 17),
+ OPC_GATHER4B = _OPC(5, 18),
+ OPC_GATHER4A = _OPC(5, 19),
+ OPC_SAMGP0 = _OPC(5, 20),
+ OPC_SAMGP1 = _OPC(5, 21),
+ OPC_SAMGP2 = _OPC(5, 22),
+ OPC_SAMGP3 = _OPC(5, 23),
+ OPC_DSXPP_1 = _OPC(5, 24),
+ OPC_DSYPP_1 = _OPC(5, 25),
+ OPC_RGETPOS = _OPC(5, 26),
+ OPC_RGETINFO = _OPC(5, 27),
+
+ /* category 6: */
+ OPC_LDG = _OPC(6, 0), /* load-global */
+ OPC_LDL = _OPC(6, 1),
+ OPC_LDP = _OPC(6, 2),
+ OPC_STG = _OPC(6, 3), /* store-global */
+ OPC_STL = _OPC(6, 4),
+ OPC_STP = _OPC(6, 5),
+ OPC_LDIB = _OPC(6, 6),
+ OPC_G2L = _OPC(6, 7),
+ OPC_L2G = _OPC(6, 8),
+ OPC_PREFETCH = _OPC(6, 9),
+ OPC_LDLW = _OPC(6, 10),
+ OPC_STLW = _OPC(6, 11),
+ OPC_RESFMT = _OPC(6, 14),
+ OPC_RESINFO = _OPC(6, 15),
+ OPC_ATOMIC_ADD = _OPC(6, 16),
+ OPC_ATOMIC_SUB = _OPC(6, 17),
+ OPC_ATOMIC_XCHG = _OPC(6, 18),
+ OPC_ATOMIC_INC = _OPC(6, 19),
+ OPC_ATOMIC_DEC = _OPC(6, 20),
+ OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
+ OPC_ATOMIC_MIN = _OPC(6, 22),
+ OPC_ATOMIC_MAX = _OPC(6, 23),
+ OPC_ATOMIC_AND = _OPC(6, 24),
+ OPC_ATOMIC_OR = _OPC(6, 25),
+ OPC_ATOMIC_XOR = _OPC(6, 26),
+ OPC_LDGB = _OPC(6, 27),
+ OPC_STGB = _OPC(6, 28),
+ OPC_STIB = _OPC(6, 29),
+ OPC_LDC = _OPC(6, 30),
+ OPC_LDLV = _OPC(6, 31),
+
+ /* category 7: */
+ OPC_BAR = _OPC(7, 0),
+ OPC_FENCE = _OPC(7, 1),
+} opc_t;
+
+#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
+#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
+
+typedef enum {
+ TYPE_F16 = 0,
+ TYPE_F32 = 1,
+ TYPE_U16 = 2,
+ TYPE_U32 = 3,
+ TYPE_S16 = 4,
+ TYPE_S32 = 5,
+ TYPE_U8 = 6,
+ TYPE_S8 = 7, // XXX I assume?
+} type_t;
+
+static inline uint32_t type_size(type_t type)
+{
+ switch (type) {
+ case TYPE_F32:
+ case TYPE_U32:
+ case TYPE_S32:
+ return 32;
+ case TYPE_F16:
+ case TYPE_U16:
+ case TYPE_S16:
+ return 16;
+ case TYPE_U8:
+ case TYPE_S8:
+ return 8;
+ default:
+ ir3_assert(0); /* invalid type */
+ return 0;
+ }
+}
+
+static inline int type_float(type_t type)
+{
+ return (type == TYPE_F32) || (type == TYPE_F16);
+}
+
+static inline int type_uint(type_t type)
+{
+ return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
+}
+
+static inline int type_sint(type_t type)
+{
+ return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
+}
+
+typedef union PACKED {
+ /* normal gpr or const src register: */
+ struct PACKED {
+ uint32_t comp : 2;
+ uint32_t num : 10;
+ };
+ /* for immediate val: */
+ int32_t iim_val : 11;
+ /* to make compiler happy: */
+ uint32_t dummy32;
+ uint32_t dummy10 : 10;
+ int32_t idummy10 : 10;
+ uint32_t dummy11 : 11;
+ uint32_t dummy12 : 12;
+ uint32_t dummy13 : 13;
+ uint32_t dummy8 : 8;
+ int32_t idummy13 : 13;
+ int32_t idummy8 : 8;
+} reg_t;
+
+/* special registers: */
+#define REG_A0 61 /* address register */
+#define REG_P0 62 /* predicate register */
+
+static inline int reg_special(reg_t reg)
+{
+ return (reg.num == REG_A0) || (reg.num == REG_P0);
+}
+
+typedef enum {
+ BRANCH_PLAIN = 0, /* br */
+ BRANCH_OR = 1, /* brao */
+ BRANCH_AND = 2, /* braa */
+ BRANCH_CONST = 3, /* brac */
+ BRANCH_ANY = 4, /* bany */
+ BRANCH_ALL = 5, /* ball */
+ BRANCH_X = 6, /* brax ??? */
+} brtype_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ int16_t immed : 16;
+ uint32_t dummy1 : 16;
+ } a3xx;
+ struct PACKED {
+ int32_t immed : 20;
+ uint32_t dummy1 : 12;
+ } a4xx;
+ struct PACKED {
+ int32_t immed : 32;
+ } a5xx;
+ };
+
+ /* dword1: */
+ uint32_t idx : 5; /* brac.N index */
+ uint32_t brtype : 3; /* branch type, see brtype_t */
+ uint32_t repeat : 3;
+ uint32_t dummy3 : 1;
+ uint32_t ss : 1;
+ uint32_t inv1 : 1;
+ uint32_t comp1 : 2;
+ uint32_t eq : 1;
+ uint32_t opc_hi : 1; /* at least one bit */
+ uint32_t dummy4 : 2;
+ uint32_t inv0 : 1;
+ uint32_t comp0 : 2; /* component for first src */
+ uint32_t opc : 4;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat0_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ /* for normal src register: */
+ struct PACKED {
+ uint32_t src : 11;
+ /* at least low bit of pad must be zero or it will
+ * look like a address relative src
+ */
+ uint32_t pad : 21;
+ };
+ /* for address relative: */
+ struct PACKED {
+ int32_t off : 10;
+ uint32_t src_rel_c : 1;
+ uint32_t src_rel : 1;
+ uint32_t unknown : 20;
+ };
+ /* for immediate: */
+ int32_t iim_val;
+ uint32_t uim_val;
+ float fim_val;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 3;
+ uint32_t src_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_type : 3;
+ uint32_t dst_rel : 1;
+ uint32_t src_type : 3;
+ uint32_t src_c : 1;
+ uint32_t src_im : 1;
+ uint32_t even : 1;
+ uint32_t pos_inf : 1;
+ uint32_t must_be_0 : 2;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat1_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ uint32_t src1 : 11;
+ uint32_t must_be_zero1: 2;
+ uint32_t src1_im : 1; /* immediate */
+ uint32_t src1_neg : 1; /* negate */
+ uint32_t src1_abs : 1; /* absolute value */
+ };
+ struct PACKED {
+ uint32_t src1 : 10;
+ uint32_t src1_c : 1; /* relative-const */
+ uint32_t src1_rel : 1; /* relative address */
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel1;
+ struct PACKED {
+ uint32_t src1 : 12;
+ uint32_t src1_c : 1; /* const */
+ uint32_t dummy : 3;
+ } c1;
+ };
+
+ union PACKED {
+ struct PACKED {
+ uint32_t src2 : 11;
+ uint32_t must_be_zero2: 2;
+ uint32_t src2_im : 1; /* immediate */
+ uint32_t src2_neg : 1; /* negate */
+ uint32_t src2_abs : 1; /* absolute value */
+ };
+ struct PACKED {
+ uint32_t src2 : 10;
+ uint32_t src2_c : 1; /* relative-const */
+ uint32_t src2_rel : 1; /* relative address */
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel2;
+ struct PACKED {
+ uint32_t src2 : 12;
+ uint32_t src2_c : 1; /* const */
+ uint32_t dummy : 3;
+ } c2;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 2;
+ uint32_t sat : 1;
+ uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
+ uint32_t ss : 1;
+ uint32_t ul : 1; /* dunno */
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t ei : 1;
+ uint32_t cond : 3;
+ uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
+ uint32_t full : 1; /* not half */
+ uint32_t opc : 6;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat2_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ uint32_t src1 : 11;
+ uint32_t must_be_zero1: 2;
+ uint32_t src2_c : 1;
+ uint32_t src1_neg : 1;
+ uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
+ };
+ struct PACKED {
+ uint32_t src1 : 10;
+ uint32_t src1_c : 1;
+ uint32_t src1_rel : 1;
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel1;
+ struct PACKED {
+ uint32_t src1 : 12;
+ uint32_t src1_c : 1;
+ uint32_t dummy : 3;
+ } c1;
+ };
+
+ union PACKED {
+ struct PACKED {
+ uint32_t src3 : 11;
+ uint32_t must_be_zero2: 2;
+ uint32_t src3_r : 1;
+ uint32_t src2_neg : 1;
+ uint32_t src3_neg : 1;
+ };
+ struct PACKED {
+ uint32_t src3 : 10;
+ uint32_t src3_c : 1;
+ uint32_t src3_rel : 1;
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel2;
+ struct PACKED {
+ uint32_t src3 : 12;
+ uint32_t src3_c : 1;
+ uint32_t dummy : 3;
+ } c2;
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 2;
+ uint32_t sat : 1;
+ uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t src2 : 8;
+ uint32_t opc : 4;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat3_t;
+
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+ switch (_OPC(3, cat3->opc)) {
+ case OPC_MAD_F16:
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32: // really??
+ return false;
+ default:
+ return true;
+ }
+}
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ struct PACKED {
+ uint32_t src : 11;
+ uint32_t must_be_zero1: 2;
+ uint32_t src_im : 1; /* immediate */
+ uint32_t src_neg : 1; /* negate */
+ uint32_t src_abs : 1; /* absolute value */
+ };
+ struct PACKED {
+ uint32_t src : 10;
+ uint32_t src_c : 1; /* relative-const */
+ uint32_t src_rel : 1; /* relative address */
+ uint32_t must_be_zero : 1;
+ uint32_t dummy : 3;
+ } rel;
+ struct PACKED {
+ uint32_t src : 12;
+ uint32_t src_c : 1; /* const */
+ uint32_t dummy : 3;
+ } c;
+ };
+ uint32_t dummy1 : 16; /* seem to be ignored */
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t repeat : 2;
+ uint32_t sat : 1;
+ uint32_t src_r : 1;
+ uint32_t ss : 1;
+ uint32_t ul : 1;
+ uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
+ uint32_t dummy2 : 5; /* seem to be ignored */
+ uint32_t full : 1; /* not half */
+ uint32_t opc : 6;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat4_t;
+
+/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
+ * if so, how to get the (base, index) pair for both sampler and texture.
+ * There is a single base embedded in the instruction, which is always used
+ * for the texture.
+ */
+typedef enum {
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is not presumed to be uniform. This is
+ * backwards-compatible with earlier generations, where this field was
+ * always 0 and nonuniform-indexed sampling always worked.
+ */
+ CAT5_NONUNIFORM = 0,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_A1_UNIFORM = 1,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is *not* presumed to be uniform.
+ */
+ CAT5_BINDLESS_NONUNIFORM = 2,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the sampler
+ * and texture index come from src3 which is *not* presumed to be
+ * uniform.
+ */
+ CAT5_BINDLESS_A1_NONUNIFORM = 3,
+
+ /* Use traditional GL binding model, get texture and sampler index
+ * from src3 which is presumed to be uniform.
+ */
+ CAT5_UNIFORM = 4,
+
+ /* The texture and sampler share the same base, and the sampler and
+ * texture index come from src3 which is presumed to be uniform.
+ */
+ CAT5_BINDLESS_UNIFORM = 5,
+
+ /* The texture and sampler share the same base, get sampler index from low
+ * 4 bits of src3 and texture index from high 4 bits.
+ */
+ CAT5_BINDLESS_IMM = 6,
+
+ /* The sampler base comes from the low 3 bits of a1.x, and the texture
+ * index comes from the next 8 bits of a1.x. The sampler index is an
+ * immediate in src3.
+ */
+ CAT5_BINDLESS_A1_IMM = 7,
+} cat5_desc_mode_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ union PACKED {
+ /* normal case: */
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 4; /* seem to be ignored */
+ uint32_t samp : 4;
+ uint32_t tex : 7;
+ } norm;
+ /* s2en case: */
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t dummy1 : 2;
+ uint32_t base_hi : 2;
+ uint32_t src3 : 8;
+ uint32_t desc_mode : 3;
+ } s2en_bindless;
+ /* same in either case: */
+ // XXX I think, confirm this
+ struct PACKED {
+ uint32_t full : 1; /* not half */
+ uint32_t src1 : 8;
+ uint32_t src2 : 8;
+ uint32_t pad : 15;
+ };
+ };
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t wrmask : 4; /* write-mask */
+ uint32_t type : 3;
+ uint32_t base_lo : 1; /* used with bindless */
+ uint32_t is_3d : 1;
+
+ uint32_t is_a : 1;
+ uint32_t is_s : 1;
+ uint32_t is_s2en_bindless : 1;
+ uint32_t is_o : 1;
+ uint32_t is_p : 1;
+
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat5_t;
+
+/* dword0 encoding for src_off: [src1 + off], src2: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t mustbe1 : 1;
+ int32_t off : 13;
+ uint32_t src1 : 8;
+ uint32_t src1_im : 1;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t dword1;
+} instr_cat6a_t;
+
+/* dword0 encoding for !src_off: [src1], src2 */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t mustbe0 : 1;
+ uint32_t src1 : 13;
+ uint32_t ignore0 : 8;
+ uint32_t src1_im : 1;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t dword1;
+} instr_cat6b_t;
+
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ /* note: there is some weird stuff going on where sometimes
+ * cat6->a.off is involved.. but that seems like a bug in
+ * the blob, since it is used even if !cat6->src_off
+ * It would make sense for there to be some more bits to
+ * bring us to 11 bits worth of offset, but not sure..
+ */
+ int32_t off : 8;
+ uint32_t mustbe1 : 1;
+ uint32_t dst : 8;
+ uint32_t pad1 : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t idx : 8;
+ uint32_t pad0 : 15;
+} instr_cat6d_t;
+
+/* ldgb and atomics..
+ *
+ * ldgb: pad0=0, pad3=1
+ * atomic .g: pad0=1, pad3=1
+ * .l: pad0=1, pad3=0
+ */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t pad0 : 1;
+ uint32_t src3 : 8;
+ uint32_t d : 2;
+ uint32_t typed : 1;
+ uint32_t type_size : 2;
+ uint32_t src1 : 8;
+ uint32_t src1_im : 1;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t src_ssbo : 8;
+ uint32_t pad2 : 3; // type
+ uint32_t g : 1;
+ uint32_t pad3 : 1;
+ uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
+} instr_cat6ldgb_t;
+
+/* stgb, pad0=0, pad3=2
+ */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t mustbe1 : 1; // ???
+ uint32_t src1 : 8;
+ uint32_t d : 2;
+ uint32_t typed : 1;
+ uint32_t type_size : 2;
+ uint32_t pad0 : 9;
+ uint32_t src2_im : 1;
+ uint32_t src2 : 8;
+
+ /* dword1: */
+ uint32_t src3 : 8;
+ uint32_t src3_im : 1;
+ uint32_t dst_ssbo : 8;
+ uint32_t pad2 : 3; // type
+ uint32_t pad3 : 2;
+ uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
+} instr_cat6stgb_t;
+
+typedef union PACKED {
+ instr_cat6a_t a;
+ instr_cat6b_t b;
+ instr_cat6c_t c;
+ instr_cat6d_t d;
+ instr_cat6ldgb_t ldgb;
+ instr_cat6stgb_t stgb;
+ struct PACKED {
+ /* dword0: */
+ uint32_t src_off : 1;
+ uint32_t pad1 : 31;
+
+ /* dword1: */
+ uint32_t pad2 : 8;
+ uint32_t dst_off : 1;
+ uint32_t pad3 : 8;
+ uint32_t type : 3;
+ uint32_t g : 1; /* or in some cases it means dst immed */
+ uint32_t pad4 : 1;
+ uint32_t opc : 5;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+ };
+} instr_cat6_t;
+
+/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
+ */
+typedef enum {
+ /* Use old GL binding model with an immediate index. */
+ CAT6_IMM = 0,
+
+ CAT6_UNIFORM = 1,
+
+ CAT6_NONUNIFORM = 2,
+
+ /* Use the bindless model, with an immediate index.
+ */
+ CAT6_BINDLESS_IMM = 4,
+
+ /* Use the bindless model, with a uniform register index.
+ */
+ CAT6_BINDLESS_UNIFORM = 5,
+
+ /* Use the bindless model, with a register index that isn't guaranteed
+ * to be uniform. This presumably checks if the indices are equal and
+ * splits up the load/store, because it works the way you would
+ * expect.
+ */
+ CAT6_BINDLESS_NONUNIFORM = 6,
+} cat6_desc_mode_t;
+
+/**
+ * For atomic ops (which return a value):
+ *
+ * pad1=1, pad3=c, pad5=3
+ * src1 - vecN offset/coords
+ * src2.x - is actually dest register
+ * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
+ * and src2.z is 'data'
+ *
+ * For stib (which does not return a value):
+ * pad1=0, pad3=c, pad5=2
+ * src1 - vecN offset/coords
+ * src2 - value to store
+ *
+ * For ldib:
+ * pad1=1, pad3=c, pad5=2
+ * src1 - vecN offset/coords
+ *
+ * for ldc (load from UBO using descriptor):
+ * pad1=0, pad3=8, pad5=2
+ *
+ * pad2 and pad5 are only observed to be 0.
+ */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 1;
+ uint32_t base : 3;
+ uint32_t pad2 : 2;
+ uint32_t desc_mode : 3;
+ uint32_t d : 2;
+ uint32_t typed : 1;
+ uint32_t type_size : 2;
+ uint32_t opc : 5;
+ uint32_t pad3 : 5;
+ uint32_t src1 : 8; /* coordinate/offset */
+
+ /* dword1: */
+ uint32_t src2 : 8; /* or the dst for load instructions */
+ uint32_t pad4 : 1; //mustbe0 ??
+ uint32_t ssbo : 8; /* ssbo/image binding point */
+ uint32_t type : 3;
+ uint32_t pad5 : 7;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+} instr_cat6_a6xx_t;
+
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 32;
+
+ /* dword1: */
+ uint32_t pad2 : 12;
+ uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
+ uint32_t pad3 : 6;
+ uint32_t w : 1; /* write */
+ uint32_t r : 1; /* read */
+ uint32_t l : 1; /* local */
+ uint32_t g : 1; /* global */
+ uint32_t opc : 4; /* presumed, but only a couple known OPCs */
+ uint32_t jmp_tgt : 1; /* (jp) */
+ uint32_t sync : 1; /* (sy) */
+ uint32_t opc_cat : 3;
+} instr_cat7_t;
+
+typedef union PACKED {
+ instr_cat0_t cat0;
+ instr_cat1_t cat1;
+ instr_cat2_t cat2;
+ instr_cat3_t cat3;
+ instr_cat4_t cat4;
+ instr_cat5_t cat5;
+ instr_cat6_t cat6;
+ instr_cat6_a6xx_t cat6_a6xx;
+ instr_cat7_t cat7;
+ struct PACKED {
+ /* dword0: */
+ uint32_t pad1 : 32;
+
+ /* dword1: */
+ uint32_t pad2 : 12;
+ uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
+ uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
+ uint32_t pad3 : 13;
+ uint32_t jmp_tgt : 1;
+ uint32_t sync : 1;
+ uint32_t opc_cat : 3;
+
+ };
+} instr_t;
+
+static inline uint32_t instr_repeat(instr_t *instr)
+{
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.repeat;
+ case 1: return instr->cat1.repeat;
+ case 2: return instr->cat2.repeat;
+ case 3: return instr->cat3.repeat;
+ case 4: return instr->cat4.repeat;
+ default: return 0;
+ }
+}
+
+static inline bool instr_sat(instr_t *instr)
+{
+ switch (instr->opc_cat) {
+ case 2: return instr->cat2.sat;
+ case 3: return instr->cat3.sat;
+ case 4: return instr->cat4.sat;
+ default: return false;
+ }
+}
+
+/* We can probably drop the gpu_id arg, but keeping it for now so we can
+ * assert if we see something we think should be new encoding on an older
+ * gpu.
+ */
+static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
+{
+ instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
+
+ /* At least one of these two bits is pad in all the possible
+ * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
+ * cmdstream traces I have indicates that the pad bit is zero
+ * in all cases. So we can use this to detect new encoding:
+ */
+ if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
+ ir3_assert(gpu_id >= 600);
+ ir3_assert(instr->cat6.opc == 0);
+ return false;
+ }
+
+ return true;
+}
+
+static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
+{
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4;
+ case 1: return 0;
+ case 2: return instr->cat2.opc;
+ case 3: return instr->cat3.opc;
+ case 4: return instr->cat4.opc;
+ case 5: return instr->cat5.opc;
+ case 6:
+ if (!is_cat6_legacy(instr, gpu_id))
+ return instr->cat6_a6xx.opc;
+ return instr->cat6.opc;
+ case 7: return instr->cat7.opc;
+ default: return 0;
+ }
+}
+
+static inline bool is_mad(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_MAD_U24:
+ case OPC_MAD_S24:
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_madsh(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MADSH_U16:
+ case OPC_MADSH_M16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_atomic(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ATOMIC_ADD:
+ case OPC_ATOMIC_SUB:
+ case OPC_ATOMIC_XCHG:
+ case OPC_ATOMIC_INC:
+ case OPC_ATOMIC_DEC:
+ case OPC_ATOMIC_CMPXCHG:
+ case OPC_ATOMIC_MIN:
+ case OPC_ATOMIC_MAX:
+ case OPC_ATOMIC_AND:
+ case OPC_ATOMIC_OR:
+ case OPC_ATOMIC_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_ssbo(opc_t opc)
+{
+ switch (opc) {
+ case OPC_RESFMT:
+ case OPC_RESINFO:
+ case OPC_LDGB:
+ case OPC_STGB:
+ case OPC_STIB:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_isam(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ISAM:
+ case OPC_ISAML:
+ case OPC_ISAMM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+static inline bool is_cat2_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_F:
+ case OPC_MIN_F:
+ case OPC_MAX_F:
+ case OPC_MUL_F:
+ case OPC_SIGN_F:
+ case OPC_CMPS_F:
+ case OPC_ABSNEG_F:
+ case OPC_CMPV_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static inline bool is_cat3_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ case OPC_SEL_F16:
+ case OPC_SEL_F32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
+
+#endif /* INSTR_A3XX_H_ */