summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2020-07-24 09:30:04 -0700
committerMarge Bot <eric+marge@anholt.net>2020-07-28 09:45:08 +0000
commit536f43cb96be91c95f6b4a88dfc8c2ba33dbda4d (patch)
tree3e6355d01d29c87e6a98e87bf774e8ab8f20672a
parent1ea4ef0d3be829e392922f5d26fbc89bf69a8a67 (diff)
freedreno: slurp in afuc
Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6070>
-rw-r--r--src/freedreno/afuc/Makefile368
-rw-r--r--src/freedreno/afuc/README.rst317
-rw-r--r--src/freedreno/afuc/afuc.h188
-rw-r--r--src/freedreno/afuc/asm.c435
-rw-r--r--src/freedreno/afuc/asm.h127
-rw-r--r--src/freedreno/afuc/disasm.c829
-rw-r--r--src/freedreno/afuc/lexer.l92
-rw-r--r--src/freedreno/afuc/meson.build69
-rw-r--r--src/freedreno/afuc/parser.y269
-rw-r--r--src/freedreno/meson.build1
10 files changed, 2695 insertions, 0 deletions
diff --git a/src/freedreno/afuc/Makefile b/src/freedreno/afuc/Makefile
new file mode 100644
index 00000000000..12e6f3aebf9
--- /dev/null
+++ b/src/freedreno/afuc/Makefile
@@ -0,0 +1,368 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.17
+
+# Default target executed when no arguments are given to make.
+default_target: all
+
+.PHONY : default_target
+
+# Allow only one "make -f Makefile2" at a time, but pass parallelism.
+.NOTPARALLEL:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Disable VCS-based implicit rules.
+% : %,v
+
+
+# Disable VCS-based implicit rules.
+% : RCS/%
+
+
+# Disable VCS-based implicit rules.
+% : RCS/%,v
+
+
+# Disable VCS-based implicit rules.
+% : SCCS/s.%
+
+
+# Disable VCS-based implicit rules.
+% : s.%
+
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Command-line flag to silence nested $(MAKE).
+$(VERBOSE)MAKESILENT = -s
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E rm -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/robclark/src/envytools
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/robclark/src/envytools
+
+#=============================================================================
+# Targets provided globally by CMake.
+
+# Special rule for the target install/strip
+install/strip: preinstall
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
+ /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
+.PHONY : install/strip
+
+# Special rule for the target install/strip
+install/strip/fast: preinstall/fast
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
+ /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
+.PHONY : install/strip/fast
+
+# Special rule for the target install/local
+install/local: preinstall
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
+ /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
+.PHONY : install/local
+
+# Special rule for the target install/local
+install/local/fast: preinstall/fast
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
+ /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
+.PHONY : install/local/fast
+
+# Special rule for the target edit_cache
+edit_cache:
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..."
+ /usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : edit_cache
+
+# Special rule for the target edit_cache
+edit_cache/fast: edit_cache
+
+.PHONY : edit_cache/fast
+
+# Special rule for the target test
+test:
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running tests..."
+ /usr/bin/ctest --force-new-ctest-process $(ARGS)
+.PHONY : test
+
+# Special rule for the target test
+test/fast: test
+
+.PHONY : test/fast
+
+# Special rule for the target install
+install: preinstall
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
+ /usr/bin/cmake -P cmake_install.cmake
+.PHONY : install
+
+# Special rule for the target install
+install/fast: preinstall/fast
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
+ /usr/bin/cmake -P cmake_install.cmake
+.PHONY : install/fast
+
+# Special rule for the target list_install_components
+list_install_components:
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\""
+.PHONY : list_install_components
+
+# Special rule for the target list_install_components
+list_install_components/fast: list_install_components
+
+.PHONY : list_install_components/fast
+
+# Special rule for the target rebuild_cache
+rebuild_cache:
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
+ /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : rebuild_cache
+
+# Special rule for the target rebuild_cache
+rebuild_cache/fast: rebuild_cache
+
+.PHONY : rebuild_cache/fast
+
+# The main all target
+all: cmake_check_build_system
+ cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles /home/robclark/src/envytools/afuc/CMakeFiles/progress.marks
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/all
+ $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles 0
+.PHONY : all
+
+# The main clean target
+clean:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/clean
+.PHONY : clean
+
+# The main clean target
+clean/fast: clean
+
+.PHONY : clean/fast
+
+# Prepare targets for installation.
+preinstall: all
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall
+.PHONY : preinstall
+
+# Prepare targets for installation.
+preinstall/fast:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall
+.PHONY : preinstall/fast
+
+# clear depends
+depend:
+ cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
+.PHONY : depend
+
+# Convenience name for target.
+afuc/CMakeFiles/asm.dir/rule:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/asm.dir/rule
+.PHONY : afuc/CMakeFiles/asm.dir/rule
+
+# Convenience name for target.
+asm: afuc/CMakeFiles/asm.dir/rule
+
+.PHONY : asm
+
+# fast build rule for target.
+asm/fast:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/build
+.PHONY : asm/fast
+
+# Convenience name for target.
+afuc/CMakeFiles/disasm.dir/rule:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/disasm.dir/rule
+.PHONY : afuc/CMakeFiles/disasm.dir/rule
+
+# Convenience name for target.
+disasm: afuc/CMakeFiles/disasm.dir/rule
+
+.PHONY : disasm
+
+# fast build rule for target.
+disasm/fast:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/build
+.PHONY : disasm/fast
+
+asm.o: asm.c.o
+
+.PHONY : asm.o
+
+# target to build an object file
+asm.c.o:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.o
+.PHONY : asm.c.o
+
+asm.i: asm.c.i
+
+.PHONY : asm.i
+
+# target to preprocess a source file
+asm.c.i:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.i
+.PHONY : asm.c.i
+
+asm.s: asm.c.s
+
+.PHONY : asm.s
+
+# target to generate assembly for a file
+asm.c.s:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.s
+.PHONY : asm.c.s
+
+disasm.o: disasm.c.o
+
+.PHONY : disasm.o
+
+# target to build an object file
+disasm.c.o:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.o
+.PHONY : disasm.c.o
+
+disasm.i: disasm.c.i
+
+.PHONY : disasm.i
+
+# target to preprocess a source file
+disasm.c.i:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.i
+.PHONY : disasm.c.i
+
+disasm.s: disasm.c.s
+
+.PHONY : disasm.s
+
+# target to generate assembly for a file
+disasm.c.s:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.s
+.PHONY : disasm.c.s
+
+lexer.o: lexer.c.o
+
+.PHONY : lexer.o
+
+# target to build an object file
+lexer.c.o:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.o
+.PHONY : lexer.c.o
+
+lexer.i: lexer.c.i
+
+.PHONY : lexer.i
+
+# target to preprocess a source file
+lexer.c.i:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.i
+.PHONY : lexer.c.i
+
+lexer.s: lexer.c.s
+
+.PHONY : lexer.s
+
+# target to generate assembly for a file
+lexer.c.s:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.s
+.PHONY : lexer.c.s
+
+parser.o: parser.c.o
+
+.PHONY : parser.o
+
+# target to build an object file
+parser.c.o:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.o
+.PHONY : parser.c.o
+
+parser.i: parser.c.i
+
+.PHONY : parser.i
+
+# target to preprocess a source file
+parser.c.i:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.i
+.PHONY : parser.c.i
+
+parser.s: parser.c.s
+
+.PHONY : parser.s
+
+# target to generate assembly for a file
+parser.c.s:
+ cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.s
+.PHONY : parser.c.s
+
+# Help Target
+help:
+ @echo "The following are some of the valid targets for this Makefile:"
+ @echo "... all (the default if no target is provided)"
+ @echo "... clean"
+ @echo "... depend"
+ @echo "... edit_cache"
+ @echo "... install"
+ @echo "... install/local"
+ @echo "... install/strip"
+ @echo "... list_install_components"
+ @echo "... rebuild_cache"
+ @echo "... test"
+ @echo "... asm"
+ @echo "... disasm"
+ @echo "... asm.o"
+ @echo "... asm.i"
+ @echo "... asm.s"
+ @echo "... disasm.o"
+ @echo "... disasm.i"
+ @echo "... disasm.s"
+ @echo "... lexer.o"
+ @echo "... lexer.i"
+ @echo "... lexer.s"
+ @echo "... parser.o"
+ @echo "... parser.i"
+ @echo "... parser.s"
+.PHONY : help
+
+
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+ cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/src/freedreno/afuc/README.rst b/src/freedreno/afuc/README.rst
new file mode 100644
index 00000000000..e06c9397d60
--- /dev/null
+++ b/src/freedreno/afuc/README.rst
@@ -0,0 +1,317 @@
+=====================
+Adreno Five Microcode
+=====================
+
+.. contents::
+
+.. _afuc-introduction:
+
+Introduction
+============
+
+Adreno GPUs prior to 6xx use two micro-controllers to parse the command-stream,
+setup the hardware for draws (or compute jobs), and do various GPU
+housekeeping. They are relatively simple (basically glorified
+register writers) and basically all their state is in a collection
+of registers. Ie. there is no stack, and no memory assigned to
+them; any global state like which bank of context registers is to
+be used in the next draw is stored in a register.
+
+The setup is similar to radeon, in fact Adreno 2xx thru 4xx used
+basically the same instruction set as r600. There is a "PFP"
+(Prefetch Parser) and "ME" (Micro Engine, also confusingly referred
+to as "PM4"). These make up the "CP" ("Command Parser"). The
+PFP runs ahead of the ME, with some PM4 packets handled entirely
+in the PFP. Between the PFP and ME is a FIFO ("MEQ"). In the
+generations prior to Adreno 5xx, the PFP and ME had different
+instruction sets.
+
+Starting with Adreno 5xx, a new microcontroller with a unified
+instruction set was introduced, although the overall architecture
+and purpose of the two microcontrollers remains the same.
+
+For lack of a better name, this new instruction set is called
+"Adreno Five MicroCode" or "afuc". (No idea what Qualcomm calls
+it internally.
+
+With Adreno 6xx, the separate PF and ME are replaced with a single
+SQE microcontroller using the same instruction set as 5xx.
+
+.. _afuc-overview:
+
+Instruction Set Overview
+========================
+
+32bit instruction set with basic arithmatic ops that can take
+either two source registers or one src and a 16b immediate.
+
+32 registers, although some are special purpose:
+
+- ``$00`` - always reads zero, otherwise seems to be the PC
+- ``$01`` - current PM4 packet header
+- ``$1c`` - alias ``$rem``, remaining data in packet
+- ``$1d`` - alias ``$addr``
+- ``$1f`` - alias ``$data``
+
+Branch instructions have a delay slot so the following instruction
+is always executed regardless of whether branch is taken or not.
+
+
+.. _afuc-alu:
+
+ALU Instructions
+================
+
+The following instructions are available:
+
+- ``add`` - add
+- ``addhi`` - add + carry (for upper 32b of 64b value)
+- ``sub`` - subtract
+- ``subhi`` - subtract + carry (for upper 32b of 64b value)
+- ``and`` - bitwise AND
+- ``or`` - bitwise OR
+- ``xor`` - bitwise XOR
+- ``not`` - bitwise NOT (no src1)
+- ``shl`` - shift-left
+- ``ushr`` - unsigned shift-right
+- ``ishr`` - signed shift-right
+- ``rot`` - rotate-left (like shift-left with wrap-around)
+- ``mul8`` - multiply low 8b of two src
+- ``min`` - minimum
+- ``max`` - maximum
+- ``comp`` - compare two values
+
+The ALU instructions can take either two src registers, or a src
+plus 16b immediate as 2nd src, ex::
+
+ add $dst, $src, 0x1234 ; src2 is immed
+ add $dst, $src1, $src2 ; src2 is reg
+
+The ``not`` instruction only takes a single source::
+
+ not $dst, $src
+ not $dst, 0x1234
+
+.. _afuc-alu-cmp:
+
+The ``cmp`` instruction returns:
+
+- ``0x00`` if src1 > src2
+- ``0x2b`` if src1 == src2
+- ``0x1e`` if src1 < src2
+
+See explanation in :ref:`afuc-branch`
+
+
+.. _afuc-branch:
+
+Branch Instructions
+===================
+
+The following branch/jump instructions are available:
+
+- ``brne`` - branch if not equal (or bit not set)
+- ``breq`` - branch if equal (or bit set)
+- ``jump`` - unconditional jump
+
+Both ``brne`` and ``breq`` have two forms, comparing the src register
+against either a small immediate (up to 5 bits) or a specific bit::
+
+ breq $src, b3, #somelabel ; branch if src & (1 << 3)
+ breq $src, 0x3, #somelabel ; branch if src == 3
+
+The branch instructions are encoded with a 16b relative offset.
+Since ``$00`` always reads back zero, it can be used to construct
+an unconditional relative jump.
+
+The :ref:`cmp <afuc-alu-cmp>` instruction can be paired with the
+bit-test variants of ``brne``/``breq`` to implement gt/ge/lt/le,
+due to the bit pattern it returns, for example::
+
+ cmp $04, $02, $03
+ breq $04, b1, #somelabel
+
+will branch if ``$02`` is less than or equal to ``$03``.
+
+
+.. _afuc-call:
+
+Call/Return
+===========
+
+Simple subroutines can be implemented with ``call``/``ret``. The
+jump instruction encodes a fixed offset.
+
+ TODO not sure how many levels deep function calls can be nested.
+ There isn't really a stack. Definitely seems to be multiple
+ levels of fxn call, see in PFP: CP_CONTEXT_SWITCH_YIELD -> f13 ->
+ f22.
+
+
+.. _afuc-control:
+
+Config Instructions
+===================
+
+These seem to read/write config state in other parts of CP. In at
+least some cases I expect these map to CP registers (but possibly
+not directly??)
+
+- ``cread $dst, [$off + addr], flags``
+- ``cwrite $src, [$off + addr], flags``
+
+In cases where no offset is needed, ``$00`` is frequently used as
+the offset.
+
+For example, the following sequences sets::
+
+ ; load CP_INDIRECT_BUFFER parameters from cmdstream:
+ mov $02, $data ; low 32b of IB target address
+ mov $03, $data ; high 32b of IB target
+ mov $04, $data ; IB size in dwords
+
+ ; sanity check # of dwords:
+ breq $04, 0x0, #l23 (#69, 04a2)
+
+ ; this seems something to do with figuring out whether
+ ; we are going from RB->IB1 or IB1->IB2 (ie. so the
+ ; below cwrite instructions update either
+ ; CP_IB1_BASE_LO/HI/BUFSIZE or CP_IB2_BASE_LO/HI/BUFSIZE
+ and $05, $18, 0x0003
+ shl $05, $05, 0x0002
+
+ ; update CP_IBn_BASE_LO/HI/BUFSIZE:
+ cwrite $02, [$05 + 0x0b0], 0x8
+ cwrite $03, [$05 + 0x0b1], 0x8
+ cwrite $04, [$05 + 0x0b2], 0x8
+
+
+
+.. _afuc-reg-access:
+
+Register Access
+===============
+
+The special registers ``$addr`` and ``$data`` can be used to write GPU
+registers, for example, to write::
+
+ mov $addr, CP_SCRATCH_REG[0x2] ; set register to write
+ mov $data, $03 ; CP_SCRATCH_REG[0x2]
+ mov $data, $04 ; CP_SCRATCH_REG[0x3]
+ ...
+
+subsequent writes to ``$data`` will increment the address of the register
+to write, so a sequence of consecutive registers can be written
+
+To read::
+
+ mov $addr, CP_SCRATCH_REG[0x2]
+ mov $03, $addr
+ mov $04, $addr
+
+Many registers that are updated frequently have two banks, so they can be
+updated without stalling for previous draw to finish. These banks are
+arranged so bit 11 is zero for bank 0 and 1 for bank 1. The ME fw (at
+least the version I'm looking at) stores this in ``$17``, so to update
+these registers from ME::
+
+ or $addr, $17, VFD_INDEX_OFFSET
+ mov $data, $03
+ ...
+
+Note that PFP doesn't seem to use this approach, instead it does something
+like::
+
+ mov $0c, CP_SCRATCH_REG[0x7]
+ mov $02, 0x789a ; value
+ cwrite $0c, [$00 + 0x010], 0x8
+ cwrite $02, [$00 + 0x011], 0x8
+
+Like with the ``$addr``/``$data`` approach, the destination register address
+increments on each write.
+
+.. _afuc-mem:
+
+Memory Access
+=============
+
+There are no load/store instructions, as such. The microcontrollers
+have only indirect memory access via GPU registers. There are two
+mechanism possible.
+
+Read/Write via CP_NRT Registers
+-------------------------------
+
+This seems to be only used by ME. If PFP were also using it, they would
+race with each other. It seems to be primarily used for small reads.
+
+- ``CP_ME_NRT_ADDR_LO``/``_HI`` - write to set the address to read or write
+- ``CP_ME_NRT_DATA`` - write to trigger write to address in ``CP_ME_NRT_ADDR``
+
+The address register increments with successive reads or writes.
+
+Memory Write example::
+
+ ; store 64b value in $04+$05 to 64b address in $02+$03
+ mov $addr, CP_ME_NRT_ADDR_LO
+ mov $data, $02
+ mov $data, $03
+ mov $addr, CP_ME_NRT_DATA
+ mov $data, $04
+ mov $data, $05
+
+Memory Read example::
+
+ ; load 64b value from address in $02+$03 into $04+$05
+ mov $addr, CP_ME_NRT_ADDR_LO
+ mov $data, $02
+ mov $data, $03
+ mov $04, $addr
+ mov $05, $addr
+
+
+Read via Control Instructions
+-----------------------------
+
+This is used by PFP whenever it needs to read memory. Also seems to be
+used by ME for streaming reads (larger amounts of data). The DMA access
+seems to be done by ROQ.
+
+ TODO might also be possible for write access
+
+ TODO some of the control commands might be synchronizing access
+ between PFP and ME??
+
+An example from ``CP_DRAW_INDIRECT`` packet handler::
+
+ mov $07, 0x0004 ; # of dwords to read from draw-indirect buffer
+ ; load address of indirect buffer from cmdstream:
+ cwrite $data, [$00 + 0x0b8], 0x8
+ cwrite $data, [$00 + 0x0b9], 0x8
+ ; set # of dwords to read:
+ cwrite $07, [$00 + 0x0ba], 0x8
+ ...
+ ; read parameters from draw-indirect buffer:
+ mov $09, $addr
+ mov $07, $addr
+ cread $12, [$00 + 0x040], 0x8
+ ; the start parameter gets written into MEQ, which ME writes
+ ; to VFD_INDEX_OFFSET register:
+ mov $data, $addr
+
+
+A6XX NOTES
+==========
+
+The ``$14`` register holds global flags set by:
+
+ CP_SKIP_IB2_ENABLE_LOCAL - b8
+ CP_SKIP_IB2_ENABLE_GLOBAL - b9
+ CP_SET_MARKER
+ MODE=GMEM - sets b15
+ MODE=BLIT2D - clears b15, b12, b7
+ CP_SET_MODE - b29+b30
+ CP_SET_VISIBILITY_OVERRIDE - b11, b21, b30?
+ CP_SET_DRAW_STATE - checks b29+b30
+
+ CP_COND_REG_EXEC - checks b10, which should be predicate flag?
diff --git a/src/freedreno/afuc/afuc.h b/src/freedreno/afuc/afuc.h
new file mode 100644
index 00000000000..4f9e9d21815
--- /dev/null
+++ b/src/freedreno/afuc/afuc.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _AFUC_H_
+#define _AFUC_H_
+
+/*
+TODO kernel debugfs to inject packet into rb for easier experimentation. It
+should trigger reloading pfp/me and resetting gpu..
+
+Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs,
+should be restricted to CAP_ADMIN and probably compile option too (default=n).
+if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from
+RB.
+ */
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define PACKED __attribute__((__packed__))
+
+/* The opcode is encoded variable length. Opcodes less than 0x30
+ * are encoded as 5 bits followed by (rep) flag. Opcodes >= 0x30
+ * (ie. top two bits are '11' are encoded as 6 bits. See get_opc()
+ */
+typedef enum {
+ OPC_NOP = 0x00,
+
+ OPC_ADD = 0x01, /* add immediate */
+ OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */
+ OPC_SUB = 0x03, /* subtract immediate */
+ OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */
+ OPC_AND = 0x05, /* AND immediate */
+ OPC_OR = 0x06, /* OR immediate */
+ OPC_XOR = 0x07, /* XOR immediate */
+ OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */
+ OPC_SHL = 0x09, /* shift-left immediate */
+ OPC_USHR = 0x0a, /* unsigned shift right by immediate */
+ OPC_ISHR = 0x0b, /* signed shift right by immediate */
+ OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */
+ OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */
+ OPC_MIN = 0x0e,
+ OPC_MAX = 0x0f,
+ OPC_CMP = 0x10, /* compare src to immed */
+ OPC_MOVI = 0x11, /* move immediate */
+
+ /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
+ * same as if src2 == 1). src1 is ignored. Note that this overlaps
+ * with STORE6, so it can only be used with the two-source encoding.
+ */
+ OPC_MSB = 0x14,
+
+
+ OPC_ALU = 0x13, /* ALU instruction with two src registers */
+
+ /* These seem something to do with setting some external state..
+ * doesn't seem to map *directly* to registers, but I guess that
+ * is where things end up. For example, this sequence in the
+ * CP_INDIRECT_BUFFER handler:
+ *
+ * mov $02, $data ; low 32b of IB target address
+ * mov $03, $data ; high 32b of IB target
+ * mov $04, $data ; IB size in dwords
+ * breq $04, 0x0, #l23 (#69, 04a2)
+ * and $05, $18, 0x0003
+ * shl $05, $05, 0x0002
+ * cwrite $02, [$05 + 0x0b0], 0x8
+ * cwrite $03, [$05 + 0x0b1], 0x8
+ * cwrite $04, [$05 + 0x0b2], 0x8
+ *
+ * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
+ * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value
+ * for RB->IB1 vs IB1->IB2.
+ */
+ OPC_CWRITE5 = 0x15,
+ OPC_CREAD5 = 0x16,
+
+ /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
+ * that let you read/write directly to memory (and bypass the IOMMU?).
+ */
+ OPC_STORE6 = 0x14,
+ OPC_CWRITE6 = 0x15,
+ OPC_LOAD6 = 0x16,
+ OPC_CREAD6 = 0x17,
+
+ OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */
+ OPC_BREQI = 0x31, /* relative branch (if $src == immed) */
+ OPC_BRNEB = 0x32, /* relative branch (if bit not set) */
+ OPC_BREQB = 0x33, /* relative branch (if bit is set) */
+ OPC_RET = 0x34, /* return */
+ OPC_CALL = 0x35, /* "function" call */
+ OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */
+ OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */
+} afuc_opc;
+
+
+typedef union PACKED {
+ /* addi, subi, andi, ori, xori, etc: */
+ struct PACKED {
+ uint32_t uimm : 16;
+ uint32_t dst : 5;
+ uint32_t src : 5;
+ uint32_t hdr : 6;
+ } alui;
+ struct PACKED {
+ uint32_t uimm : 16;
+ uint32_t dst : 5;
+ uint32_t shift : 5;
+ uint32_t hdr : 6;
+ } movi;
+ struct PACKED {
+ uint32_t alu : 5;
+ uint32_t pad : 6;
+ uint32_t dst : 5;
+ uint32_t src2 : 5;
+ uint32_t src1 : 5;
+ uint32_t hdr : 6;
+ } alu;
+ struct PACKED {
+ uint32_t uimm : 12;
+ uint32_t flags : 4;
+ uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */
+ uint32_t src2 : 5; /* read or write address is src2+uimm */
+ uint32_t hdr : 6;
+ } control;
+ struct PACKED {
+ int32_t ioff : 16; /* relative offset */
+ uint32_t bit_or_imm : 5;
+ uint32_t src : 5;
+ uint32_t hdr : 6;
+ } br;
+ struct PACKED {
+ uint32_t uoff : 26; /* absolute (unsigned) offset */
+ uint32_t hdr : 6;
+ } call;
+ struct PACKED {
+ uint32_t pad : 26;
+ uint32_t hdr : 6;
+ } waitin;
+ struct PACKED {
+ uint32_t pad : 26;
+ uint32_t opc_r : 6;
+ };
+
+} afuc_instr;
+
+static inline void
+afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
+{
+ if (ai->opc_r < 0x30) {
+ *opc = ai->opc_r >> 1;
+ *rep = ai->opc_r & 0x1;
+ } else {
+ *opc = ai->opc_r;
+ *rep = false;
+ }
+}
+
+static inline void
+afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
+{
+ if (opc < 0x30) {
+ ai->opc_r = opc << 1;
+ ai->opc_r |= !!rep;
+ } else {
+ ai->opc_r = opc;
+ }
+}
+
+#endif /* _AFUC_H_ */
diff --git a/src/freedreno/afuc/asm.c b/src/freedreno/afuc/asm.c
new file mode 100644
index 00000000000..321d06adfef
--- /dev/null
+++ b/src/freedreno/afuc/asm.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+
+#include "afuc.h"
+#include "rnn.h"
+#include "rnndec.h"
+#include "parser.h"
+#include "asm.h"
+
+int gpuver;
+
+
+static struct rnndeccontext *ctx;
+static struct rnndb *db;
+static struct rnndomain *control_regs;
+struct rnndomain *dom[2];
+
+
+/* bit lame to hard-code max but fw sizes are small */
+static struct asm_instruction instructions[0x2000];
+static unsigned num_instructions;
+
+static struct asm_label labels[0x512];
+static unsigned num_labels;
+
+struct asm_instruction *next_instr(int tok)
+{
+ struct asm_instruction *ai = &instructions[num_instructions++];
+ assert(num_instructions < ARRAY_SIZE(instructions));
+ ai->tok = tok;
+ return ai;
+}
+
+void decl_label(const char *str)
+{
+ struct asm_label *label = &labels[num_labels++];
+
+ assert(num_labels < ARRAY_SIZE(labels));
+
+ label->offset = num_instructions;
+ label->label = str;
+}
+
+static int resolve_label(const char *str)
+{
+ int i;
+
+ for (i = 0; i < num_labels; i++) {
+ struct asm_label *label = &labels[i];
+
+ if (!strcmp(str, label->label)) {
+ return label->offset;
+ }
+ }
+
+ fprintf(stderr, "Undeclared label: %s\n", str);
+ exit(2);
+}
+
+static afuc_opc tok2alu(int tok)
+{
+ switch (tok) {
+ case T_OP_ADD: return OPC_ADD;
+ case T_OP_ADDHI: return OPC_ADDHI;
+ case T_OP_SUB: return OPC_SUB;
+ case T_OP_SUBHI: return OPC_SUBHI;
+ case T_OP_AND: return OPC_AND;
+ case T_OP_OR: return OPC_OR;
+ case T_OP_XOR: return OPC_XOR;
+ case T_OP_NOT: return OPC_NOT;
+ case T_OP_SHL: return OPC_SHL;
+ case T_OP_USHR: return OPC_USHR;
+ case T_OP_ISHR: return OPC_ISHR;
+ case T_OP_ROT: return OPC_ROT;
+ case T_OP_MUL8: return OPC_MUL8;
+ case T_OP_MIN: return OPC_MIN;
+ case T_OP_MAX: return OPC_MAX;
+ case T_OP_CMP: return OPC_CMP;
+ case T_OP_MSB: return OPC_MSB;
+ default:
+ assert(0);
+ return -1;
+ }
+}
+
+static void emit_instructions(int outfd)
+{
+ int i;
+
+ /* there is an extra 0x00000000 which kernel strips off.. we could
+ * perhaps use it for versioning.
+ */
+ i = 0;
+ write(outfd, &i, 4);
+
+ for (i = 0; i < num_instructions; i++) {
+ struct asm_instruction *ai = &instructions[i];
+ afuc_instr instr = {0};
+ afuc_opc opc;
+
+ /* special case, 2nd dword is patched up w/ # of instructions
+ * (ie. offset of jmptbl)
+ */
+ if (i == 1) {
+ assert(ai->is_literal);
+ ai->literal &= ~0xffff;
+ ai->literal |= num_instructions;
+ }
+
+ if (ai->is_literal) {
+ write(outfd, &ai->literal, 4);
+ continue;
+ }
+
+ switch (ai->tok) {
+ case T_OP_NOP:
+ opc = OPC_NOP;
+ if (gpuver >= 6)
+ instr.pad = 0x1000000;
+ break;
+ case T_OP_ADD:
+ case T_OP_ADDHI:
+ case T_OP_SUB:
+ case T_OP_SUBHI:
+ case T_OP_AND:
+ case T_OP_OR:
+ case T_OP_XOR:
+ case T_OP_NOT:
+ case T_OP_SHL:
+ case T_OP_USHR:
+ case T_OP_ISHR:
+ case T_OP_ROT:
+ case T_OP_MUL8:
+ case T_OP_MIN:
+ case T_OP_MAX:
+ case T_OP_CMP:
+ case T_OP_MSB:
+ if (ai->has_immed) {
+ /* MSB overlaps with STORE */
+ assert(ai->tok != T_OP_MSB);
+ opc = tok2alu(ai->tok);
+ instr.alui.dst = ai->dst;
+ instr.alui.src = ai->src1;
+ instr.alui.uimm = ai->immed;
+ } else {
+ opc = OPC_ALU;
+ instr.alu.dst = ai->dst;
+ instr.alu.src1 = ai->src1;
+ instr.alu.src2 = ai->src2;
+ instr.alu.alu = tok2alu(ai->tok);
+ }
+ break;
+ case T_OP_MOV:
+ /* move can either be encoded as movi (ie. move w/ immed) or
+ * an alu instruction
+ */
+ if (ai->has_immed) {
+ opc = OPC_MOVI;
+ instr.movi.dst = ai->dst;
+ instr.movi.uimm = ai->immed;
+ instr.movi.shift = ai->shift;
+ } else if (ai->label) {
+ /* mov w/ a label is just an alias for an immediate, this
+ * is useful to load the address of a constant table into
+ * a register:
+ */
+ opc = OPC_MOVI;
+ instr.movi.dst = ai->dst;
+ instr.movi.uimm = resolve_label(ai->label);
+ instr.movi.shift = ai->shift;
+ } else {
+ /* encode as: or $dst, $00, $src */
+ opc = OPC_ALU;
+ instr.alu.dst = ai->dst;
+ instr.alu.src1 = 0x00; /* $00 reads-back 0 */
+ instr.alu.src2 = ai->src1;
+ instr.alu.alu = OPC_OR;
+ }
+ break;
+ case T_OP_CWRITE:
+ case T_OP_CREAD:
+ case T_OP_STORE:
+ case T_OP_LOAD:
+ if (gpuver >= 6) {
+ if (ai->tok == T_OP_CWRITE) {
+ opc = OPC_CWRITE6;
+ } else if (ai->tok == T_OP_CREAD) {
+ opc = OPC_CREAD6;
+ } else if (ai->tok == T_OP_STORE) {
+ opc = OPC_STORE6;
+ } else if (ai->tok == T_OP_LOAD) {
+ opc = OPC_LOAD6;
+ }
+ } else {
+ if (ai->tok == T_OP_CWRITE) {
+ opc = OPC_CWRITE5;
+ } else if (ai->tok == T_OP_CREAD) {
+ opc = OPC_CREAD5;
+ } else if (ai->tok == T_OP_STORE ||
+ ai->tok == T_OP_LOAD) {
+ fprintf(stderr, "load and store do not exist on a5xx\n");
+ exit(1);
+ }
+ }
+ instr.control.src1 = ai->src1;
+ instr.control.src2 = ai->src2;
+ instr.control.flags = ai->bit;
+ instr.control.uimm = ai->immed;
+ break;
+ case T_OP_BRNE:
+ case T_OP_BREQ:
+ if (ai->has_immed) {
+ opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI;
+ instr.br.bit_or_imm = ai->immed;
+ } else {
+ opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB;
+ instr.br.bit_or_imm = ai->bit;
+ }
+ instr.br.src = ai->src1;
+ instr.br.ioff = resolve_label(ai->label) - i;
+ break;
+ case T_OP_RET:
+ opc = OPC_RET;
+ break;
+ case T_OP_CALL:
+ opc = OPC_CALL;
+ instr.call.uoff = resolve_label(ai->label);
+ break;
+ case T_OP_PREEMPTLEAVE:
+ opc = OPC_PREEMPTLEAVE6;
+ instr.call.uoff = resolve_label(ai->label);
+ break;
+ case T_OP_JUMP:
+ /* encode jump as: brne $00, b0, #label */
+ opc = OPC_BRNEB;
+ instr.br.bit_or_imm = 0;
+ instr.br.src = 0x00; /* $00 reads-back 0.. compare to 0 */
+ instr.br.ioff = resolve_label(ai->label) - i;
+ break;
+ case T_OP_WAITIN:
+ opc = OPC_WIN;
+ break;
+ default:
+ assert(0);
+ }
+
+ afuc_set_opc(&instr, opc, ai->rep);
+
+ write(outfd, &instr, 4);
+ }
+
+}
+
+static int find_enum_val(struct rnnenum *en, const char *name)
+{
+ int i;
+
+ for (i = 0; i < en->valsnum; i++)
+ if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name))
+ return en->vals[i]->value;
+
+ return -1;
+}
+
+static int find_reg(struct rnndomain *dom, const char *name)
+{
+ int i;
+
+ for (i = 0; i < dom->subelemsnum; i++)
+ if (!strcmp(name, dom->subelems[i]->name))
+ return dom->subelems[i]->offset;
+
+ return -1;
+}
+
+unsigned parse_control_reg(const char *name)
+{
+ /* skip leading "@" */
+ int val = find_reg(control_regs, name + 1);
+ if (val < 0) {
+ printf("invalid control reg: %s\n", name);
+ exit(2);
+ }
+ return (unsigned)val;
+}
+
+static void emit_jumptable(int outfd)
+{
+ struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
+ uint32_t jmptable[0x80] = {0};
+ int i;
+
+ for (i = 0; i < num_labels; i++) {
+ struct asm_label *label = &labels[i];
+ int id = find_enum_val(en, label->label);
+
+ /* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */
+ if (id < 0) {
+ if (sscanf(label->label, "UNKN%d", &id) != 1) {
+ /* if still not found, must not belong in jump-table: */
+ continue;
+ }
+ }
+
+ jmptable[id] = label->offset;
+ }
+
+ write(outfd, jmptable, sizeof(jmptable));
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage:\n"
+ "\tasm [-g GPUVER] filename.asm filename.fw\n"
+ "\t\t-g - specify GPU version (5, etc)\n"
+ );
+ exit(2);
+}
+
+int main(int argc, char **argv)
+{
+ FILE *in;
+ char *file, *outfile, *name, *control_reg_name;
+ int c, ret, outfd;
+
+ /* Argument parsing: */
+ while ((c = getopt (argc, argv, "g:")) != -1) {
+ switch (c) {
+ case 'g':
+ gpuver = atoi(optarg);
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= (argc + 1)) {
+ fprintf(stderr, "no file specified!\n");
+ usage();
+ }
+
+ file = argv[optind];
+ outfile = argv[optind + 1];
+
+ outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (outfd < 0) {
+ fprintf(stderr, "could not open \"%s\"\n", outfile);
+ usage();
+ }
+
+ in = fopen(file, "r");
+ if (!in) {
+ fprintf(stderr, "could not open \"%s\"\n", file);
+ usage();
+ }
+
+ yyset_in(in);
+
+ /* if gpu version not specified, infer from filename: */
+ if (!gpuver) {
+ if (strstr(file, "a5")) {
+ gpuver = 5;
+ } else if (strstr(file, "a6")) {
+ gpuver = 6;
+ }
+ }
+
+ switch (gpuver) {
+ case 6:
+ name = "A6XX";
+ control_reg_name = "A6XX_CONTROL_REG";
+ break;
+ case 5:
+ name = "A5XX";
+ control_reg_name = "A5XX_CONTROL_REG";
+ break;
+ default:
+ fprintf(stderr, "unknown GPU version!\n");
+ usage();
+ }
+
+ rnn_init();
+ db = rnn_newdb();
+
+ ctx = rnndec_newcontext(db);
+
+ rnn_parsefile(db, "adreno.xml");
+ dom[0] = rnn_finddomain(db, name);
+ dom[1] = rnn_finddomain(db, "AXXX");
+ control_regs = rnn_finddomain(db, control_reg_name);
+
+ ret = yyparse();
+ if (ret) {
+ fprintf(stderr, "parse failed: %d\n", ret);
+ return ret;
+ }
+
+ emit_instructions(outfd);
+ emit_jumptable(outfd);
+
+ close(outfd);
+
+ return 0;
+}
diff --git a/src/freedreno/afuc/asm.h b/src/freedreno/afuc/asm.h
new file mode 100644
index 00000000000..03fb1508907
--- /dev/null
+++ b/src/freedreno/afuc/asm.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _ASM_H_
+#define _ASM_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "afuc.h"
+
+extern int gpuver;
+
+/**
+ * Intermediate representation for an instruction, before final encoding.
+ * This mostly exists because we need to resolve label offset's in a 2nd
+ * pass, but also so that parser.y doesn't really need to care so much
+ * about the different encodings for 2src regs vs 1src+immed, or mnemonics
+ */
+struct asm_instruction {
+ int tok;
+ int dst;
+ int src1;
+ int src2;
+ int immed;
+ int shift;
+ int bit;
+ uint32_t literal;
+ const char *label;
+
+ bool has_immed : 1;
+ bool has_shift : 1;
+ bool has_bit : 1;
+ bool is_literal : 1;
+ bool rep : 1;
+};
+
+struct asm_label {
+ unsigned offset;
+ const char *label;
+};
+
+struct asm_instruction *next_instr(int tok);
+void decl_label(const char *str);
+
+
+static inline uint32_t
+parse_reg(const char *str)
+{
+ char *retstr;
+ long int ret;
+
+ if (!strcmp(str, "$rem"))
+ return 0x1c;
+ else if (!strcmp(str, "$addr"))
+ return 0x1d;
+ else if (!strcmp(str, "$addr2"))
+ return 0x1e;
+ else if (!strcmp(str, "$data"))
+ return 0x1f;
+
+ ret = strtol(str + 1, &retstr, 16);
+
+ if (*retstr != '\0') {
+ printf("invalid register: %s\n", str);
+ exit(2);
+ }
+
+ return ret;
+}
+
+static inline uint32_t
+parse_literal(const char *str)
+{
+ char *retstr;
+ long int ret;
+
+ ret = strtol(str + 1, &retstr, 16);
+
+ if (*retstr != ']') {
+ printf("invalid literal: %s\n", str);
+ exit(2);
+ }
+
+ return ret;
+}
+
+static inline uint32_t
+parse_bit(const char *str)
+{
+ return strtol(str + 1, NULL, 10);
+}
+
+unsigned parse_control_reg(const char *name);
+
+/* string trailing ':' off label: */
+static inline const char *
+parse_label_decl(const char *str)
+{
+ char *s = strdup(str);
+ s[strlen(s) - 1] = '\0';
+ return s;
+}
+
+void yyset_in (FILE * _in_str );
+
+
+#endif /* _ASM_H_ */
diff --git a/src/freedreno/afuc/disasm.c b/src/freedreno/afuc/disasm.c
new file mode 100644
index 00000000000..ea9f34cd97f
--- /dev/null
+++ b/src/freedreno/afuc/disasm.c
@@ -0,0 +1,829 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+
+#include "afuc.h"
+#include "rnn.h"
+#include "rnndec.h"
+
+static int gpuver;
+
+
+static struct rnndeccontext *ctx;
+static struct rnndb *db;
+static struct rnndomain *control_regs;
+struct rnndomain *dom[2];
+const char *variant;
+
+/* non-verbose mode should output something suitable to feed back into
+ * assembler.. verbose mode has additional output useful for debugging
+ * (like unexpected bits that are set)
+ */
+static bool verbose = false;
+
+static void print_gpu_reg(uint32_t regbase)
+{
+ struct rnndomain *d = NULL;
+
+ if (regbase < 0x100)
+ return;
+
+ if (rnndec_checkaddr(ctx, dom[0], regbase, 0))
+ d = dom[0];
+ else if (rnndec_checkaddr(ctx, dom[1], regbase, 0))
+ d = dom[1];
+
+ if (d) {
+ struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0);
+ if (info) {
+ printf("\t; %s", info->name);
+ free(info->name);
+ free(info);
+ return;
+ }
+ }
+}
+
+static void printc(const char *c, const char *fmt, ...)
+{
+ va_list args;
+ printf("%s", c);
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ printf("%s", ctx->colors->reset);
+}
+
+#define printerr(fmt, ...) printc(ctx->colors->err, fmt, ##__VA_ARGS__)
+#define printlbl(fmt, ...) printc(ctx->colors->btarg, fmt, ##__VA_ARGS__)
+
+static void print_reg(unsigned reg)
+{
+// XXX seems like *reading* $00 --> literal zero??
+// seems like read from $1c gives packet remaining len??
+// $01 current packet header, writing to $01 triggers
+// parsing header and jumping to appropriate handler.
+ if (reg == 0x1c)
+ printf("$rem"); /* remainding dwords in packet */
+ else if (reg == 0x1d)
+ printf("$addr");
+ else if (reg == 0x1e)
+ printf("$addr2"); // XXX
+ else if (reg == 0x1f)
+ printf("$data");
+ else
+ printf("$%02x", reg);
+}
+
+static void print_src(unsigned reg)
+{
+ print_reg(reg);
+}
+
+static void print_dst(unsigned reg)
+{
+ print_reg(reg);
+}
+
+static void print_alu_name(afuc_opc opc, uint32_t instr)
+{
+ if (opc == OPC_ADD) {
+ printf("add ");
+ } else if (opc == OPC_ADDHI) {
+ printf("addhi ");
+ } else if (opc == OPC_SUB) {
+ printf("sub ");
+ } else if (opc == OPC_SUBHI) {
+ printf("subhi ");
+ } else if (opc == OPC_AND) {
+ printf("and ");
+ } else if (opc == OPC_OR) {
+ printf("or ");
+ } else if (opc == OPC_XOR) {
+ printf("xor ");
+ } else if (opc == OPC_NOT) {
+ printf("not ");
+ } else if (opc == OPC_SHL) {
+ printf("shl ");
+ } else if (opc == OPC_USHR) {
+ printf("ushr ");
+ } else if (opc == OPC_ISHR) {
+ printf("ishr ");
+ } else if (opc == OPC_ROT) {
+ printf("rot ");
+ } else if (opc == OPC_MUL8) {
+ printf("mul8 ");
+ } else if (opc == OPC_MIN) {
+ printf("min ");
+ } else if (opc == OPC_MAX) {
+ printf("max ");
+ } else if (opc == OPC_CMP) {
+ printf("cmp ");
+ } else if (opc == OPC_MSB) {
+ printf("msb ");
+ } else {
+ printerr("[%08x]", instr);
+ printf(" ; alu%02x ", opc);
+ }
+}
+
+static char *getpm4(uint32_t id)
+{
+ struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
+ if (en) {
+ int i;
+ for (i = 0; i < en->valsnum; i++)
+ if (en->vals[i]->valvalid && en->vals[i]->value == id) {
+ const char *v = en->vals[i]->varinfo.variantsstr;
+ if (v && !strstr(v, variant))
+ continue;
+ return en->vals[i]->name;
+ }
+ }
+ return NULL;
+}
+
+static inline unsigned
+_odd_parity_bit(unsigned val)
+{
+ /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
+ * note that we want odd parity so 0x6996 is inverted.
+ */
+ val ^= val >> 16;
+ val ^= val >> 8;
+ val ^= val >> 4;
+ val &= 0xf;
+ return (~0x6996 >> val) & 1;
+}
+
+static struct {
+ uint32_t offset;
+ uint32_t num_jump_labels;
+ uint32_t jump_labels[256];
+} jump_labels[1024];
+int num_jump_labels;
+
+static void add_jump_table_entry(uint32_t n, uint32_t offset)
+{
+ int i;
+
+ if (n > 128) /* can't possibly be a PM4 PKT3.. */
+ return;
+
+ for (i = 0; i < num_jump_labels; i++)
+ if (jump_labels[i].offset == offset)
+ goto add_label;
+
+ num_jump_labels = i + 1;
+ jump_labels[i].offset = offset;
+ jump_labels[i].num_jump_labels = 0;
+
+add_label:
+ jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
+ assert(jump_labels[i].num_jump_labels < 256);
+}
+
+static int get_jump_table_entry(uint32_t offset)
+{
+ int i;
+
+ for (i = 0; i < num_jump_labels; i++)
+ if (jump_labels[i].offset == offset)
+ return i;
+
+ return -1;
+}
+
+static uint32_t label_offsets[0x512];
+static int num_label_offsets;
+
+static int label_idx(uint32_t offset, bool create)
+{
+ int i;
+ for (i = 0; i < num_label_offsets; i++)
+ if (offset == label_offsets[i])
+ return i;
+ if (!create)
+ return -1;
+ label_offsets[i] = offset;
+ num_label_offsets = i+1;
+ return i;
+}
+
+static const char *
+label_name(uint32_t offset, bool allow_jt)
+{
+ static char name[8];
+ int lidx;
+
+ if (allow_jt) {
+ lidx = get_jump_table_entry(offset);
+ if (lidx >= 0) {
+ int j;
+ for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
+ uint32_t jump_label = jump_labels[lidx].jump_labels[j];
+ char *str = getpm4(jump_label);
+ if (str)
+ return str;
+ }
+ // if we don't find anything w/ known name, maybe we should
+ // return UNKN%d to at least make it clear that this is some
+ // sort of jump-table entry?
+ }
+ }
+
+ lidx = label_idx(offset, false);
+ if (lidx < 0)
+ return NULL;
+ sprintf(name, "l%03d", lidx);
+ return name;
+}
+
+
+static uint32_t fxn_offsets[0x512];
+static int num_fxn_offsets;
+
+static int fxn_idx(uint32_t offset, bool create)
+{
+ int i;
+ for (i = 0; i < num_fxn_offsets; i++)
+ if (offset == fxn_offsets[i])
+ return i;
+ if (!create)
+ return -1;
+ fxn_offsets[i] = offset;
+ num_fxn_offsets = i+1;
+ return i;
+}
+
+static const char *
+fxn_name(uint32_t offset)
+{
+ static char name[8];
+ int fidx = fxn_idx(offset, false);
+ if (fidx < 0)
+ return NULL;
+ sprintf(name, "fxn%02d", fidx);
+ return name;
+}
+
+static void print_control_reg(uint32_t id)
+{
+ if (rnndec_checkaddr(ctx, control_regs, id, 0)) {
+ struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0);
+ printf("@%s", info->name);
+ free(info->name);
+ free(info);
+ } else {
+ printf("0x%03x", id);
+ }
+}
+
+static void disasm(uint32_t *buf, int sizedwords)
+{
+ uint32_t *instrs = buf;
+ const int jmptbl_start = instrs[1] & 0xffff;
+ uint32_t *jmptbl = &buf[jmptbl_start];
+ afuc_opc opc;
+ bool rep;
+ int i;
+
+
+ /* parse jumptable: */
+ for (i = 0; i < 0x80; i++) {
+ unsigned offset = jmptbl[i];
+ unsigned n = i;// + CP_NOP;
+ add_jump_table_entry(n, offset);
+ }
+
+ /* do a pre-pass to find instructions that are potential branch targets,
+ * and add labels for them:
+ */
+ for (i = 0; i < jmptbl_start; i++) {
+ afuc_instr *instr = (void *)&instrs[i];
+
+ afuc_get_opc(instr, &opc, &rep);
+
+ switch (opc) {
+ case OPC_BRNEI:
+ case OPC_BREQI:
+ case OPC_BRNEB:
+ case OPC_BREQB:
+ label_idx(i + instr->br.ioff, true);
+ break;
+ case OPC_PREEMPTLEAVE6:
+ if (gpuver >= 6)
+ label_idx(instr->call.uoff, true);
+ break;
+ case OPC_CALL:
+ fxn_idx(instr->call.uoff, true);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* print instructions: */
+ for (i = 0; i < jmptbl_start; i++) {
+ int jump_label_idx;
+ afuc_instr *instr = (void *)&instrs[i];
+ const char *fname, *lname;
+ afuc_opc opc;
+ bool rep;
+
+ afuc_get_opc(instr, &opc, &rep);
+
+ lname = label_name(i, false);
+ fname = fxn_name(i);
+ jump_label_idx = get_jump_table_entry(i);
+
+ if (jump_label_idx >= 0) {
+ int j;
+ printf("\n");
+ for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
+ uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
+ char *name = getpm4(jump_label);
+ if (name) {
+ printlbl("%s", name);
+ } else {
+ printlbl("UNKN%d", jump_label);
+ }
+ printf(":\n");
+ }
+ }
+
+ if (fname) {
+ printlbl("%s", fname);
+ printf(":\n");
+ }
+
+ if (lname) {
+ printlbl(" %s", lname);
+ printf(":");
+ } else {
+ printf(" ");
+ }
+
+
+ if (verbose) {
+ printf("\t%04x: %08x ", i, instrs[i]);
+ } else {
+ printf(" ");
+ }
+
+ switch (opc) {
+ case OPC_NOP: {
+ /* a6xx changed the default immediate, and apparently 0
+ * is illegal now.
+ */
+ const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
+ if (instrs[i] != nop) {
+ printerr("[%08x]", instrs[i]);
+ printf(" ; ");
+ }
+ if (rep)
+ printf("(rep)");
+ printf("nop");
+ print_gpu_reg(instrs[i]);
+
+ break;
+ }
+ case OPC_ADD:
+ case OPC_ADDHI:
+ case OPC_SUB:
+ case OPC_SUBHI:
+ case OPC_AND:
+ case OPC_OR:
+ case OPC_XOR:
+ case OPC_NOT:
+ case OPC_SHL:
+ case OPC_USHR:
+ case OPC_ISHR:
+ case OPC_ROT:
+ case OPC_MUL8:
+ case OPC_MIN:
+ case OPC_MAX:
+ case OPC_CMP: {
+ bool src1 = true;
+
+ if (opc == OPC_NOT)
+ src1 = false;
+
+ if (rep)
+ printf("(rep)");
+
+ print_alu_name(opc, instrs[i]);
+ print_dst(instr->alui.dst);
+ printf(", ");
+ if (src1) {
+ print_src(instr->alui.src);
+ printf(", ");
+ }
+ printf("0x%04x", instr->alui.uimm);
+ print_gpu_reg(instr->alui.uimm);
+
+ /* print out unexpected bits: */
+ if (verbose) {
+ if (instr->alui.src && !src1)
+ printerr(" (src=%02x)", instr->alui.src);
+ }
+
+ break;
+ }
+ case OPC_MOVI: {
+ if (rep)
+ printf("(rep)");
+ printf("mov ");
+ print_dst(instr->movi.dst);
+ printf(", 0x%04x", instr->movi.uimm);
+ if (instr->movi.shift)
+ printf(" << %u", instr->movi.shift);
+
+ /* using mov w/ << 16 is popular way to construct a pkt7
+ * header to send (for ex, from PFP to ME), so check that
+ * case first
+ */
+ if ((instr->movi.shift == 16) &&
+ ((instr->movi.uimm & 0xff00) == 0x7000)) {
+ unsigned opc, p;
+
+ opc = instr->movi.uimm & 0x7f;
+ p = _odd_parity_bit(opc);
+
+ /* So, you'd think that checking the parity bit would be
+ * a good way to rule out false positives, but seems like
+ * ME doesn't really care.. at least it would filter out
+ * things that look like actual legit packets between
+ * PFP and ME..
+ */
+ if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
+ const char *name = getpm4(opc);
+ printf("\t; ");
+ if (name)
+ printlbl("%s", name);
+ else
+ printlbl("UNKN%u", opc);
+ break;
+ }
+ }
+
+ print_gpu_reg(instr->movi.uimm << instr->movi.shift);
+
+ break;
+ }
+ case OPC_ALU: {
+ bool src1 = true;
+
+ if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
+ src1 = false;
+
+ if (instr->alu.pad)
+ printf("[%08x] ; ", instrs[i]);
+
+ if (rep)
+ printf("(rep)");
+
+ /* special case mnemonics:
+ * reading $00 seems to always yield zero, and so:
+ * or $dst, $00, $src -> mov $dst, $src
+ * Maybe add one for negate too, ie.
+ * sub $dst, $00, $src ???
+ */
+ if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
+ printf("mov ");
+ src1 = false;
+ } else {
+ print_alu_name(instr->alu.alu, instrs[i]);
+ }
+
+ print_dst(instr->alu.dst);
+ if (src1) {
+ printf(", ");
+ print_src(instr->alu.src1);
+ }
+ printf(", ");
+ print_src(instr->alu.src2);
+
+ /* print out unexpected bits: */
+ if (verbose) {
+ if (instr->alu.pad)
+ printerr(" (pad=%03x)", instr->alu.pad);
+ if (instr->alu.src1 && !src1)
+ printerr(" (src1=%02x)", instr->alu.src1);
+ }
+ break;
+ }
+ case OPC_CWRITE6:
+ case OPC_CREAD6:
+ case OPC_STORE6:
+ case OPC_LOAD6: {
+ if (rep)
+ printf("(rep)");
+
+ bool is_control_reg = true;
+ if (gpuver >= 6) {
+ switch (opc) {
+ case OPC_CWRITE6:
+ printf("cwrite ");
+ break;
+ case OPC_CREAD6:
+ printf("cread ");
+ break;
+ case OPC_STORE6:
+ is_control_reg = false;
+ printf("store ");
+ break;
+ case OPC_LOAD6:
+ is_control_reg = false;
+ printf("load ");
+ break;
+ default:
+ assert(!"unreachable");
+ }
+ } else {
+ switch (opc) {
+ case OPC_CWRITE5:
+ printf("cwrite ");
+ break;
+ case OPC_CREAD5:
+ printf("cread ");
+ break;
+ default:
+ fprintf(stderr, "A6xx control opcode on A5xx?\n");
+ exit(1);
+ }
+ }
+
+ print_src(instr->control.src1);
+ printf(", [");
+ print_src(instr->control.src2);
+ printf(" + ");
+ if (is_control_reg && instr->control.flags != 0x4)
+ print_control_reg(instr->control.uimm);
+ else
+ printf("0x%03x", instr->control.uimm);
+ printf("], 0x%x", instr->control.flags);
+ break;
+ }
+ case OPC_BRNEI:
+ case OPC_BREQI:
+ case OPC_BRNEB:
+ case OPC_BREQB: {
+ unsigned off = i + instr->br.ioff;
+
+ assert(!rep);
+
+ /* Since $00 reads back zero, it can be used as src for
+ * unconditional branches. (This only really makes sense
+ * for the BREQB.. or possible BRNEI if imm==0.)
+ *
+ * If bit=0 then branch is taken if *all* bits are zero.
+ * Otherwise it is taken if bit (bit-1) is clear.
+ *
+ * Note the instruction after a jump/branch is executed
+ * regardless of whether branch is taken, so use nop or
+ * take that into account in code.
+ */
+ if (instr->br.src || (opc != OPC_BRNEB)) {
+ bool immed = false;
+
+ if (opc == OPC_BRNEI) {
+ printf("brne ");
+ immed = true;
+ } else if (opc == OPC_BREQI) {
+ printf("breq ");
+ immed = true;
+ } else if (opc == OPC_BRNEB) {
+ printf("brne ");
+ } else if (opc == OPC_BREQB) {
+ printf("breq ");
+ }
+ print_src(instr->br.src);
+ if (immed) {
+ printf(", 0x%x,", instr->br.bit_or_imm);
+ } else {
+ printf(", b%u,", instr->br.bit_or_imm);
+ }
+ } else {
+ printf("jump");
+ if (verbose && instr->br.bit_or_imm) {
+ printerr(" (src=%03x, bit=%03x) ",
+ instr->br.src, instr->br.bit_or_imm);
+ }
+ }
+
+ printf(" #");
+ printlbl("%s", label_name(off, true));
+ if (verbose)
+ printf(" (#%d, %04x)", instr->br.ioff, off);
+ break;
+ }
+ case OPC_CALL:
+ assert(!rep);
+ printf("call #");
+ printlbl("%s", fxn_name(instr->call.uoff));
+ if (verbose) {
+ printf(" (%04x)", instr->call.uoff);
+ if (instr->br.bit_or_imm || instr->br.src) {
+ printerr(" (src=%03x, bit=%03x) ",
+ instr->br.src, instr->br.bit_or_imm);
+ }
+ }
+ break;
+ case OPC_RET:
+ assert(!rep);
+ if (instr->pad)
+ printf("[%08x] ; ", instrs[i]);
+ printf("ret");
+ break;
+ case OPC_WIN:
+ assert(!rep);
+ if (instr->waitin.pad)
+ printf("[%08x] ; ", instrs[i]);
+ printf("waitin");
+ if (verbose && instr->waitin.pad)
+ printerr(" (pad=%x)", instr->waitin.pad);
+ break;
+ case OPC_PREEMPTLEAVE6:
+ if (gpuver < 6) {
+ printf("[%08x] ; op38", instrs[i]);
+ }
+ printf("preemptleave #");
+ printlbl("%s", label_name(instr->call.uoff, true));
+ break;
+ default:
+ printerr("[%08x]", instrs[i]);
+ printf(" ; op%02x ", opc);
+ print_dst(instr->alui.dst);
+ printf(", ");
+ print_src(instr->alui.src);
+ print_gpu_reg(instrs[i] & 0xffff);
+ break;
+ }
+ printf("\n");
+ }
+
+ /* print jumptable: */
+ if (verbose) {
+ printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
+ printf("; JUMP TABLE\n");
+ for (i = 0; i < 0x7f; i++) {
+ int n = i;// + CP_NOP;
+ uint32_t offset = jmptbl[i];
+ char *name = getpm4(n);
+ printf("%3d %02x: ", n, n);
+ printf("%04x", offset);
+ if (name) {
+ printf(" ; %s", name);
+ } else {
+ printf(" ; UNKN%d", n);
+ }
+ printf("\n");
+ }
+ }
+}
+
+#define CHUNKSIZE 4096
+
+static char * readfile(const char *path, int *sz)
+{
+ char *buf = NULL;
+ int fd, ret, n = 0;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ while (1) {
+ buf = realloc(buf, n + CHUNKSIZE);
+ ret = read(fd, buf + n, CHUNKSIZE);
+ if (ret < 0) {
+ free(buf);
+ *sz = 0;
+ return NULL;
+ } else if (ret < CHUNKSIZE) {
+ n += ret;
+ *sz = n;
+ return buf;
+ } else {
+ n += CHUNKSIZE;
+ }
+ }
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage:\n"
+ "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
+ "\t\t-g - specify GPU version (5, etc)\n"
+ "\t\t-c - use colors\n"
+ "\t\t-v - verbose output\n"
+ );
+ exit(2);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t *buf;
+ char *file, *control_reg_name;
+ bool colors = false;
+ int sz, c;
+
+ /* Argument parsing: */
+ while ((c = getopt (argc, argv, "g:vc")) != -1) {
+ switch (c) {
+ case 'g':
+ gpuver = atoi(optarg);
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case 'c':
+ colors = true;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "no file specified!\n");
+ usage();
+ }
+
+ file = argv[optind];
+
+ /* if gpu version not specified, infer from filename: */
+ if (!gpuver) {
+ if (strstr(file, "a5")) {
+ gpuver = 5;
+ } else if (strstr(file, "a6")) {
+ gpuver = 6;
+ }
+ }
+
+ switch (gpuver) {
+ case 6:
+ printf("; a6xx microcode\n");
+ variant = "A6XX";
+ control_reg_name = "A6XX_CONTROL_REG";
+ break;
+ case 5:
+ printf("; a5xx microcode\n");
+ variant = "A5XX";
+ control_reg_name = "A5XX_CONTROL_REG";
+ break;
+ default:
+ fprintf(stderr, "unknown GPU version!\n");
+ usage();
+ }
+
+ rnn_init();
+ db = rnn_newdb();
+
+ ctx = rnndec_newcontext(db);
+ ctx->colors = colors ? &envy_def_colors : &envy_null_colors;
+
+ rnn_parsefile(db, "adreno.xml");
+ dom[0] = rnn_finddomain(db, variant);
+ dom[1] = rnn_finddomain(db, "AXXX");
+ control_regs = rnn_finddomain(db, control_reg_name);
+
+ buf = (uint32_t *)readfile(file, &sz);
+
+ printf("; Disassembling microcode: %s\n", file);
+ printf("; Version: %08x\n\n", buf[1]);
+ disasm(&buf[1], sz/4 - 1);
+
+ return 0;
+}
diff --git a/src/freedreno/afuc/lexer.l b/src/freedreno/afuc/lexer.l
new file mode 100644
index 00000000000..aacc9473877
--- /dev/null
+++ b/src/freedreno/afuc/lexer.l
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+%{
+#include <stdlib.h>
+#include "parser.h"
+#include "asm.h"
+
+#define TOKEN(t) (yylval.tok = t)
+extern YYSTYPE yylval;
+
+%}
+
+%option noyywrap
+
+%%
+"\n" yylineno++;
+[ \t] ; /* ignore whitespace */
+";"[^\n]*"\n" yylineno++; /* ignore comments */
+[1-9][0-9]* yylval.num = strtoul(yytext, NULL, 0); return T_INT;
+"0x"[0-9a-fA-F]* yylval.num = strtoul(yytext, NULL, 0); return T_HEX;
+
+"$"[0-9a-fA-F][0-9a-fA-F] yylval.num = parse_reg(yytext); return T_REGISTER;
+"$"[a-zA-Z][a-zA-Z0-9]* yylval.num = parse_reg(yytext); return T_REGISTER;
+"b"[0-9][0-9]* yylval.num = parse_bit(yytext); return T_BIT;
+"@"[a-zA-Z_][a-zA-Z0-9_]* yylval.num = parse_control_reg(yytext); return T_CONTROL_REG;
+"#"[a-zA-Z_][a-zA-Z0-9_]* yylval.str = strdup(yytext+1); return T_LABEL_REF; /* label reference */
+[a-zA-Z_][a-zA-Z0-9_]*":" yylval.str = parse_label_decl(yytext); return T_LABEL_DECL; /* label declaration */
+"["[0-9a-fA-F][0-9a-fA-F]*"]" yylval.num = parse_literal(yytext); return T_LITERAL;
+
+ /* instructions: */
+"nop" return TOKEN(T_OP_NOP);
+"add" return TOKEN(T_OP_ADD);
+"addhi" return TOKEN(T_OP_ADDHI);
+"sub" return TOKEN(T_OP_SUB);
+"subhi" return TOKEN(T_OP_SUBHI);
+"and" return TOKEN(T_OP_AND);
+"or" return TOKEN(T_OP_OR);
+"xor" return TOKEN(T_OP_XOR);
+"not" return TOKEN(T_OP_NOT);
+"shl" return TOKEN(T_OP_SHL);
+"ushr" return TOKEN(T_OP_USHR);
+"ishr" return TOKEN(T_OP_ISHR);
+"rot" return TOKEN(T_OP_ROT);
+"mul8" return TOKEN(T_OP_MUL8);
+"min" return TOKEN(T_OP_MIN);
+"max" return TOKEN(T_OP_MAX);
+"cmp" return TOKEN(T_OP_CMP);
+"msb" return TOKEN(T_OP_MSB);
+"mov" return TOKEN(T_OP_MOV);
+"cwrite" return TOKEN(T_OP_CWRITE);
+"cread" return TOKEN(T_OP_CREAD);
+"store" return TOKEN(T_OP_STORE);
+"load" return TOKEN(T_OP_LOAD);
+"brne" return TOKEN(T_OP_BRNE);
+"breq" return TOKEN(T_OP_BREQ);
+"ret" return TOKEN(T_OP_RET);
+"call" return TOKEN(T_OP_CALL);
+"jump" return TOKEN(T_OP_JUMP);
+"waitin" return TOKEN(T_OP_WAITIN);
+"preemptleave" return TOKEN(T_OP_PREEMPTLEAVE);
+"<<" return TOKEN(T_LSHIFT);
+"(rep)" return TOKEN(T_REP);
+
+"," return ',';
+"[" return '[';
+"]" return ']';
+"+" return '+';
+
+. fprintf(stderr, "error at line %d: Unknown token: %s\n", yyget_lineno(), yytext); yyterminate();
+
+%%
diff --git a/src/freedreno/afuc/meson.build b/src/freedreno/afuc/meson.build
new file mode 100644
index 00000000000..8a62a33d6b8
--- /dev/null
+++ b/src/freedreno/afuc/meson.build
@@ -0,0 +1,69 @@
+# Copyright © 2020 Google, Inc
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+afuc_parser = custom_target(
+ 'parser.[ch]',
+ input: 'parser.y',
+ output: ['parser.c', 'parser.h'],
+ command: [
+ prog_bison, '@INPUT@', '--defines=@OUTPUT1@', '--output=@OUTPUT0@'
+ ]
+)
+
+afuc_lexer = custom_target(
+ 'lexer.c',
+ input: 'lexer.l',
+ output: 'lexer.c',
+ command: [
+ prog_flex, '-o', '@OUTPUT@', '@INPUT@'
+ ]
+)
+
+asm = executable(
+ 'asm',
+ [
+ 'asm.c',
+ afuc_lexer,
+ afuc_parser,
+ ],
+ include_directories: [
+ inc_freedreno_rnn,
+ ],
+ link_with: [
+ libfreedreno_rnn,
+ ],
+ dependencies: [],
+ build_by_default : with_tools.contains('freedreno'),
+ install: false,
+)
+
+disasm = executable(
+ 'disasm',
+ 'disasm.c',
+ include_directories: [
+ inc_freedreno_rnn,
+ ],
+ link_with: [
+ libfreedreno_rnn,
+ ],
+ dependencies: [],
+ build_by_default : with_tools.contains('freedreno'),
+ install: false
+)
diff --git a/src/freedreno/afuc/parser.y b/src/freedreno/afuc/parser.y
new file mode 100644
index 00000000000..9f82286692c
--- /dev/null
+++ b/src/freedreno/afuc/parser.y
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+%{
+#define YYDEBUG 0
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "asm.h"
+
+
+int yyget_lineno(void);
+
+#ifdef YYDEBUG
+int yydebug;
+#endif
+
+extern int yylex(void);
+typedef void *YY_BUFFER_STATE;
+extern YY_BUFFER_STATE yy_scan_string(const char *);
+extern void yy_delete_buffer(YY_BUFFER_STATE);
+
+int yyparse(void);
+
+void yyerror(const char *error);
+void yyerror(const char *error)
+{
+ fprintf(stderr, "error at line %d: %s\n", yyget_lineno(), error);
+}
+
+static struct asm_instruction *instr; /* current instruction */
+
+static void
+new_instr(int tok)
+{
+ instr = next_instr(tok);
+}
+
+static void
+dst(int num)
+{
+ instr->dst = num;
+}
+
+static void
+src1(int num)
+{
+ instr->src1 = num;
+}
+
+static void
+src2(int num)
+{
+ instr->src2 = num;
+}
+
+static void
+immed(int num)
+{
+ instr->immed = num;
+ instr->has_immed = true;
+}
+
+static void
+shift(int num)
+{
+ instr->shift = num;
+ instr->has_shift = true;
+}
+
+static void
+bit(int num)
+{
+ instr->bit = num;
+ instr->has_bit = true;
+}
+
+static void
+literal(uint32_t num)
+{
+ instr->literal = num;
+ instr->is_literal = true;
+}
+
+static void
+label(const char *str)
+{
+ instr->label = str;
+}
+
+%}
+
+%union {
+ int tok;
+ uint32_t num;
+ const char *str;
+}
+
+%{
+static void print_token(FILE *file, int type, YYSTYPE value)
+{
+ fprintf(file, "\ntype: %d\n", type);
+}
+
+#define YYPRINT(file, type, value) print_token(file, type, value)
+%}
+
+%token <num> T_INT
+%token <num> T_HEX
+%token <num> T_CONTROL_REG
+%token <str> T_LABEL_DECL
+%token <str> T_LABEL_REF
+%token <num> T_LITERAL
+%token <num> T_BIT
+%token <num> T_REGISTER
+
+%token <tok> T_OP_NOP
+%token <tok> T_OP_ADD
+%token <tok> T_OP_ADDHI
+%token <tok> T_OP_SUB
+%token <tok> T_OP_SUBHI
+%token <tok> T_OP_AND
+%token <tok> T_OP_OR
+%token <tok> T_OP_XOR
+%token <tok> T_OP_NOT
+%token <tok> T_OP_SHL
+%token <tok> T_OP_USHR
+%token <tok> T_OP_ISHR
+%token <tok> T_OP_ROT
+%token <tok> T_OP_MUL8
+%token <tok> T_OP_MIN
+%token <tok> T_OP_MAX
+%token <tok> T_OP_CMP
+%token <tok> T_OP_MSB
+%token <tok> T_OP_MOV
+%token <tok> T_OP_CWRITE
+%token <tok> T_OP_CREAD
+%token <tok> T_OP_STORE
+%token <tok> T_OP_LOAD
+%token <tok> T_OP_BRNE
+%token <tok> T_OP_BREQ
+%token <tok> T_OP_RET
+%token <tok> T_OP_CALL
+%token <tok> T_OP_JUMP
+%token <tok> T_OP_WAITIN
+%token <tok> T_OP_PREEMPTLEAVE
+%token <tok> T_LSHIFT
+%token <tok> T_REP
+
+%type <num> reg
+%type <num> immediate
+
+%error-verbose
+
+%start instrs
+
+%%
+
+instrs: instr_or_label instrs
+| instr_or_label
+
+instr_or_label: instr_r
+| T_REP instr_r { instr->rep = true; }
+| branch_instr
+| other_instr
+| T_LABEL_DECL { decl_label($1); }
+
+/* instructions that can optionally have (rep) flag: */
+instr_r: alu_instr
+| config_instr
+
+/* need to special case:
+ * - not (single src, possibly an immediate)
+ * - msb (single src, must be reg)
+ * - mov (single src, plus possibly a shift)
+ * from the other ALU instructions:
+ */
+
+alu_msb_instr: T_OP_MSB reg ',' reg { new_instr($1); dst($2); src2($4); }
+
+alu_not_instr: T_OP_NOT reg ',' reg { new_instr($1); dst($2); src2($4); }
+| T_OP_NOT reg ',' immediate { new_instr($1); dst($2); immed($4); }
+
+alu_mov_instr: T_OP_MOV reg ',' reg { new_instr($1); dst($2); src1($4); }
+| T_OP_MOV reg ',' immediate T_LSHIFT immediate {
+ new_instr($1); dst($2); immed($4); shift($6);
+}
+| T_OP_MOV reg ',' immediate { new_instr($1); dst($2); immed($4); }
+| T_OP_MOV reg ',' T_LABEL_REF T_LSHIFT immediate {
+ new_instr($1); dst($2); label($4); shift($6);
+}
+| T_OP_MOV reg ',' T_LABEL_REF { new_instr($1); dst($2); label($4); }
+
+alu_2src_op: T_OP_ADD { new_instr($1); }
+| T_OP_ADDHI { new_instr($1); }
+| T_OP_SUB { new_instr($1); }
+| T_OP_SUBHI { new_instr($1); }
+| T_OP_AND { new_instr($1); }
+| T_OP_OR { new_instr($1); }
+| T_OP_XOR { new_instr($1); }
+| T_OP_SHL { new_instr($1); }
+| T_OP_USHR { new_instr($1); }
+| T_OP_ISHR { new_instr($1); }
+| T_OP_ROT { new_instr($1); }
+| T_OP_MUL8 { new_instr($1); }
+| T_OP_MIN { new_instr($1); }
+| T_OP_MAX { new_instr($1); }
+| T_OP_CMP { new_instr($1); }
+
+alu_2src_instr: alu_2src_op reg ',' reg ',' reg { dst($2); src1($4); src2($6); }
+| alu_2src_op reg ',' reg ',' immediate { dst($2); src1($4); immed($6); }
+
+alu_instr: alu_2src_instr
+| alu_msb_instr
+| alu_not_instr
+| alu_mov_instr
+
+config_op: T_OP_CWRITE { new_instr($1); }
+| T_OP_CREAD { new_instr($1); }
+| T_OP_LOAD { new_instr($1); }
+| T_OP_STORE { new_instr($1); }
+
+config_instr: config_op reg ',' '[' reg '+' immediate ']' ',' immediate {
+ src1($2); src2($5); immed($7); bit($10);
+}
+
+branch_op: T_OP_BRNE { new_instr($1); }
+| T_OP_BREQ { new_instr($1); }
+
+branch_instr: branch_op reg ',' T_BIT ',' T_LABEL_REF { src1($2); bit($4); label($6); }
+| branch_op reg ',' immediate ',' T_LABEL_REF { src1($2); immed($4); label($6); }
+
+other_instr: T_OP_CALL T_LABEL_REF { new_instr($1); label($2); }
+| T_OP_PREEMPTLEAVE T_LABEL_REF { new_instr($1); label($2); }
+| T_OP_RET { new_instr($1); }
+| T_OP_JUMP T_LABEL_REF { new_instr($1); label($2); }
+| T_OP_WAITIN { new_instr($1); }
+| T_OP_NOP { new_instr($1); }
+| T_LITERAL { new_instr($1); literal($1); }
+
+reg: T_REGISTER
+
+immediate: T_HEX
+| T_INT
+| T_CONTROL_REG
+| T_CONTROL_REG '+' immediate { $$ = $1 + $3; }
+
diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build
index 6405a7d51dc..3df6dfb16c6 100644
--- a/src/freedreno/meson.build
+++ b/src/freedreno/meson.build
@@ -35,6 +35,7 @@ dep_libxml2 = dependency('libxml-2.0', required: false)
if dep_libxml2.found()
subdir('rnn')
subdir('decode')
+ subdir('afuc')
endif
if with_tools.contains('drm-shim')