summaryrefslogtreecommitdiff
path: root/src/freedreno/decode/crashdec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/freedreno/decode/crashdec.c')
-rw-r--r--src/freedreno/decode/crashdec.c537
1 files changed, 188 insertions, 349 deletions
diff --git a/src/freedreno/decode/crashdec.c b/src/freedreno/decode/crashdec.c
index 5e54f7fa060..35316acf320 100644
--- a/src/freedreno/decode/crashdec.c
+++ b/src/freedreno/decode/crashdec.c
@@ -36,54 +36,23 @@
* or times out after 5min)
*/
-#include <assert.h>
-#include <getopt.h>
-#include <inttypes.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "freedreno_pm4.h"
-
-#include "ir3/instr-a3xx.h"
-#include "buffers.h"
-#include "cffdec.h"
-#include "disasm.h"
-#include "pager.h"
-#include "rnnutil.h"
-#include "util.h"
+
+#include "crashdec.h"
static FILE *in;
-static bool verbose;
+bool verbose;
+
+struct rnn *rnn_gmu;
+struct rnn *rnn_control;
+struct rnn *rnn_pipe;
-static struct rnn *rnn_gmu;
-static struct rnn *rnn_control;
-static struct rnn *rnn_pipe;
+static uint64_t fault_iova;
+static bool has_fault_iova;
-static struct cffdec_options options = {
+struct cffdec_options options = {
.draw_filter = -1,
};
-static inline bool
-is_a6xx(void)
-{
- return (600 <= options.gpu_id) && (options.gpu_id < 700);
-}
-static inline bool
-is_a5xx(void)
-{
- return (500 <= options.gpu_id) && (options.gpu_id < 600);
-}
-static inline bool
-is_64b(void)
-{
- return options.gpu_id >= 500;
-}
-
/*
* Helpers to read register values:
*/
@@ -282,10 +251,84 @@ decode_ringbuffer(void)
}
}
+/*
+ * Decode GMU log
+ */
+
+static void
+decode_gmu_log(void)
+{
+ uint64_t iova;
+ uint32_t size;
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " iova:")) {
+ parseline(line, " iova: %" PRIx64, &iova);
+ } else if (startswith(line, " size:")) {
+ parseline(line, " size: %u", &size);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ void *buf = popline_ascii85(size / 4);
+
+ dump_hex_ascii(buf, size, 1);
+
+ free(buf);
+
+ continue;
+ }
+
+ printf("%s", line);
+ }
+}
+
+/*
+ * Decode HFI queues
+ */
+
+static void
+decode_gmu_hfi(void)
+{
+ struct a6xx_hfi_state hfi = {};
+
+ /* Initialize the history buffers with invalid entries (-1): */
+ memset(&hfi.history, 0xff, sizeof(hfi.history));
+
+ foreach_line_in_section (line) {
+ if (startswith(line, " iova:")) {
+ parseline(line, " iova: %" PRIx64, &hfi.iova);
+ } else if (startswith(line, " size:")) {
+ parseline(line, " size: %u", &hfi.size);
+ } else if (startswith(line, " queue-history")) {
+ unsigned qidx, dummy;
+
+ parseline(line, " queue-history[%u]:", &qidx);
+ assert(qidx < ARRAY_SIZE(hfi.history));
+
+ parseline(line, " queue-history[%u]: %d %d %d %d %d %d %d %d", &dummy,
+ &hfi.history[qidx][0], &hfi.history[qidx][1],
+ &hfi.history[qidx][2], &hfi.history[qidx][3],
+ &hfi.history[qidx][4], &hfi.history[qidx][5],
+ &hfi.history[qidx][6], &hfi.history[qidx][7]);
+ } else if (startswith(line, " data: !!ascii85 |")) {
+ hfi.buf = popline_ascii85(hfi.size / 4);
+
+ if (verbose)
+ dump_hex_ascii(hfi.buf, hfi.size, 1);
+
+ dump_gmu_hfi(&hfi);
+
+ free(hfi.buf);
+
+ continue;
+ }
+
+ printf("%s", line);
+ }
+}
+
static bool
valid_header(uint32_t pkt)
{
- if (options.gpu_id >= 500) {
+ if (options.info->chip >= 5) {
return pkt_is_type4(pkt) || pkt_is_type7(pkt);
} else {
/* TODO maybe we can check validish looking pkt3 opc or pkt0
@@ -305,9 +348,11 @@ dump_cmdstream(void)
printf("got rb_base=%" PRIx64 "\n", rb_base);
options.ibs[1].base = regval64("CP_IB1_BASE");
- options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
+ if (is_a6xx())
+ options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
options.ibs[2].base = regval64("CP_IB2_BASE");
- options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
+ if (is_a6xx())
+ options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
/* Adjust remaining size to account for cmdstream slurped into ROQ
* but not yet consumed by SQE
@@ -319,8 +364,10 @@ dump_cmdstream(void)
* by name rather than hard-coding this.
*/
if (is_a6xx()) {
- options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
- options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
+ uint32_t ib1_rem = regval("CP_ROQ_AVAIL_IB1") >> 16;
+ uint32_t ib2_rem = regval("CP_ROQ_AVAIL_IB2") >> 16;
+ options.ibs[1].rem += ib1_rem ? ib1_rem - 1 : 0;
+ options.ibs[2].rem += ib2_rem ? ib2_rem - 1 : 0;
}
printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem);
@@ -346,6 +393,7 @@ dump_cmdstream(void)
unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
if (verbose) {
+ handle_prefetch(ringbuffers[id].buf, ringszdw);
dump_commands(ringbuffers[id].buf, ringszdw, 0);
return;
}
@@ -378,12 +426,31 @@ dump_cmdstream(void)
buf[idx] = ringbuffers[id].buf[p];
}
+ handle_prefetch(buf, cmdszdw);
dump_commands(buf, cmdszdw, 0);
free(buf);
}
}
/*
+ * Decode optional 'fault-info' section. We only get this section if
+ * the devcoredump was triggered by an iova fault:
+ */
+
+static void
+decode_fault_info(void)
+{
+ foreach_line_in_section (line) {
+ if (startswith(line, " - far:")) {
+ parseline(line, " - far: %" PRIx64, &fault_iova);
+ has_fault_iova = true;
+ }
+
+ printf("%s", line);
+ }
+}
+
+/*
* Decode 'bos' (buffers) section:
*/
@@ -396,8 +463,32 @@ decode_bos(void)
foreach_line_in_section (line) {
if (startswith(line, " - iova:")) {
parseline(line, " - iova: %" PRIx64, &iova);
+ continue;
} else if (startswith(line, " size:")) {
parseline(line, " size: %u", &size);
+
+ /*
+ * This is a bit convoluted, vs just printing the lines as
+ * they come. But we want to have both the iova and size
+ * so we can print the end address of the buffer
+ */
+
+ uint64_t end = iova + size;
+
+ printf(" - iova: 0x%016" PRIx64 "-0x%016" PRIx64, iova, end);
+
+ if (has_fault_iova) {
+ if ((iova <= fault_iova) && (fault_iova < end)) {
+ /* Fault address was within what should be a mapped buffer!! */
+ printf("\t==");
+ } else if ((iova <= fault_iova) && (fault_iova < (end + size))) {
+ /* Fault address was near this mapped buffer */
+ printf("\t>=");
+ }
+ }
+ printf("\n");
+ printf(" size: %u (0x%x)\n", size, size);
+ continue;
} else if (startswith(line, " data: !!ascii85 |")) {
uint32_t *buf = popline_ascii85(size / 4);
@@ -417,42 +508,51 @@ decode_bos(void)
* Decode registers section:
*/
-static void
-dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
+void
+dump_register(struct regacc *r)
{
- struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
+ struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, r->regbase);
if (info && info->typeinfo) {
- char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
+ char *decoded = rnndec_decodeval(r->rnn->vc, info->typeinfo, r->value);
printf("%s: %s\n", info->name, decoded);
} else if (info) {
- printf("%s: %08x\n", info->name, value);
+ printf("%s: %08"PRIx64"\n", info->name, r->value);
} else {
- printf("<%04x>: %08x\n", offset, value);
+ printf("<%04x>: %08"PRIx64"\n", r->regbase, r->value);
}
+ rnn_reginfo_free(info);
}
static void
decode_gmu_registers(void)
{
+ struct regacc r = regacc(rnn_gmu);
+
foreach_line_in_section (line) {
uint32_t offset, value;
parseline(line, " - { offset: %x, value: %x }", &offset, &value);
- printf("\t%08x\t", value);
- dump_register(rnn_gmu, offset / 4, value);
+ if (regacc_push(&r, offset / 4, value)) {
+ printf("\t%08"PRIx64"\t", r.value);
+ dump_register(&r);
+ }
}
}
static void
decode_registers(void)
{
+ struct regacc r = regacc(NULL);
+
foreach_line_in_section (line) {
uint32_t offset, value;
parseline(line, " - { offset: %x, value: %x }", &offset, &value);
reg_set(offset / 4, value);
- printf("\t%08x", value);
- dump_register_val(offset / 4, value, 0);
+ if (regacc_push(&r, offset / 4, value)) {
+ printf("\t%08"PRIx64, r.value);
+ dump_register_val(&r, 0);
+ }
}
}
@@ -460,6 +560,8 @@ decode_registers(void)
static void
decode_clusters(void)
{
+ struct regacc r = regacc(NULL);
+
foreach_line_in_section (line) {
if (startswith(line, " - cluster-name:") ||
startswith(line, " - context:")) {
@@ -470,8 +572,10 @@ decode_clusters(void)
uint32_t offset, value;
parseline(line, " - { offset: %x, value: %x }", &offset, &value);
- printf("\t%08x", value);
- dump_register_val(offset / 4, value, 0);
+ if (regacc_push(&r, offset / 4, value)) {
+ printf("\t%08"PRIx64, r.value);
+ dump_register_val(&r, 0);
+ }
}
}
@@ -509,14 +613,18 @@ dump_control_regs(uint32_t *regs)
if (!rnn_control)
return;
+ struct regacc r = regacc(rnn_control);
+
/* Control regs 0x100-0x17f are a scratch space to be used by the
* firmware however it wants, unlike lower regs which involve some
* fixed-function units. Therefore only these registers get dumped
* directly.
*/
for (uint32_t i = 0; i < 0x80; i++) {
- printf("\t%08x\t", regs[i]);
- dump_register(rnn_control, i + 0x100, regs[i]);
+ if (regacc_push(&r, i + 0x100, regs[i])) {
+ printf("\t%08"PRIx64"\t", r.value);
+ dump_register(&r);
+ }
}
}
@@ -564,292 +672,6 @@ dump_cp_ucode_dbg(uint32_t *dbg)
}
static void
-dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
- bool pipe)
-{
- if (pipe) {
- struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
- printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
-
- if (!strcmp(info->typeinfo->name, "void")) {
- /* registers that ignore their payload */
- } else {
- printf("\t\t\t");
- dump_register(rnn_pipe, reg, data);
- }
- } else {
- printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
- dump_register_val(reg, data, 2);
- }
-}
-
-static void
-dump_mem_pool_chunk(const uint32_t *chunk)
-{
- struct __attribute__((packed)) {
- bool reg0_enabled : 1;
- bool reg1_enabled : 1;
- uint32_t data0 : 32;
- uint32_t data1 : 32;
- uint32_t reg0 : 18;
- uint32_t reg1 : 18;
- bool reg0_pipe : 1;
- bool reg1_pipe : 1;
- uint32_t reg0_context : 1;
- uint32_t reg1_context : 1;
- uint32_t padding : 22;
- } fields;
-
- memcpy(&fields, chunk, 4 * sizeof(uint32_t));
-
- if (fields.reg0_enabled) {
- dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
- fields.reg0_pipe);
- }
-
- if (fields.reg1_enabled) {
- dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
- fields.reg1_pipe);
- }
-}
-
-static void
-dump_cp_mem_pool(uint32_t *mempool)
-{
- /* The mem pool is a shared pool of memory used for storing in-flight
- * register writes. There are 6 different queues, one for each
- * cluster. Writing to $data (or for some special registers, $addr)
- * pushes data onto the appropriate queue, and each queue is pulled
- * from by the appropriate cluster. The queues are thus written to
- * in-order, but may be read out-of-order.
- *
- * The queues are conceptually divided into 128-bit "chunks", and the
- * read and write pointers are in units of chunks. These chunks are
- * organized internally into 8-chunk "blocks", and memory is allocated
- * dynamically in terms of blocks. Each queue is represented as a
- * singly-linked list of blocks, as well as 3-bit start/end chunk
- * pointers that point within the first/last block. The next pointers
- * are located in a separate array, rather than inline.
- */
-
- /* TODO: The firmware CP_MEM_POOL save/restore routines do something
- * like:
- *
- * cread $02, [ $00 + 0 ]
- * and $02, $02, 0x118
- * ...
- * brne $02, 0, #label
- * mov $03, 0x2000
- * mov $03, 0x1000
- * label:
- * ...
- *
- * I think that control register 0 is the GPU version, and some
- * versions have a smaller mem pool. It seems some models have a mem
- * pool that's half the size, and a bunch of offsets are shifted
- * accordingly. Unfortunately the kernel driver's dumping code doesn't
- * seem to take this into account, even the downstream android driver,
- * and we don't know which versions 0x8, 0x10, or 0x100 correspond
- * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
- */
- bool small_mem_pool = false;
-
- /* The array of next pointers for each block. */
- const uint32_t *next_pointers =
- small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
-
- /* Maximum number of blocks in the pool, also the size of the pointers
- * array.
- */
- const int num_blocks = small_mem_pool ? 0x30 : 0x80;
-
- /* Number of queues */
- const unsigned num_queues = 6;
-
- /* Unfortunately the per-queue state is a little more complicated than
- * a simple pair of begin/end pointers. Instead of a single beginning
- * block, there are *two*, with the property that either the two are
- * equal or the second is the "next" of the first. Similarly there are
- * two end blocks. Thus the queue either looks like this:
- *
- * A -> B -> ... -> C -> D
- *
- * Or like this, or some combination:
- *
- * A/B -> ... -> C/D
- *
- * However, there's only one beginning/end chunk offset. Now the
- * question is, which of A or B is the actual start? I.e. is the chunk
- * offset an offset inside A or B? It depends. I'll show a typical read
- * cycle, starting here (read pointer marked with a *) with a chunk
- * offset of 0:
- *
- * A B
- * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
- * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
- *
- * Once the pointer advances far enough, the hardware decides to free
- * A, after which the read-side state looks like:
- *
- * (free) A/B
- * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
- * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
- *
- * Then after advancing the pointer a bit more, the hardware fetches
- * the "next" pointer for A and stores it in B:
- *
- * (free) A B
- * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
- * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
- *
- * Then the read pointer advances into B, at which point we've come
- * back to the first state having advanced a whole block:
- *
- * (free) A B
- * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
- * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
- *
- *
- * There is a similar cycle for the write pointer. Now, the question
- * is, how do we know which state we're in? We need to know this to
- * know whether the pointer (*) is in A or B if they're different. It
- * seems like there should be some bit somewhere describing this, but
- * after lots of experimentation I've come up empty-handed. For now we
- * assume that if the pointer is in the first half, then we're in
- * either the first or second state and use B, and otherwise we're in
- * the second or third state and use A. So far I haven't seen anything
- * that violates this assumption.
- */
-
- struct {
- uint32_t unk0;
- uint32_t padding0[7]; /* Mirrors of unk0 */
-
- struct {
- uint32_t chunk : 3;
- uint32_t first_block : 32 - 3;
- } writer[6];
- uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
-
- uint32_t unk1;
- uint32_t padding2[7]; /* Mirrors of unk1 */
-
- uint32_t writer_second_block[6];
- uint32_t padding3[2];
-
- uint32_t unk2[6];
- uint32_t padding4[2];
-
- struct {
- uint32_t chunk : 3;
- uint32_t first_block : 32 - 3;
- } reader[6];
- uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
-
- uint32_t unk3;
- uint32_t padding6[7]; /* Mirrors of unk3 */
-
- uint32_t reader_second_block[6];
- uint32_t padding7[2];
-
- uint32_t block_count[6];
- uint32_t padding[2];
-
- uint32_t unk4;
- uint32_t padding9[7]; /* Mirrors of unk4 */
- } data1;
-
- const uint32_t *data1_ptr =
- small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
- memcpy(&data1, data1_ptr, sizeof(data1));
-
- /* Based on the kernel, the first dword is the mem pool size (in
- * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
- */
- const uint32_t *data2_ptr =
- small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
- const int data2_size = 0x60;
-
- /* This seems to be the size of each queue in chunks. */
- const uint32_t *queue_sizes = &data2_ptr[0x18];
-
- printf("\tdata2:\n");
- dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
-
- /* These seem to be some kind of counter of allocated/deallocated blocks */
- if (verbose) {
- printf("\tunk0: %x\n", data1.unk0);
- printf("\tunk1: %x\n", data1.unk1);
- printf("\tunk3: %x\n", data1.unk3);
- printf("\tunk4: %x\n\n", data1.unk4);
- }
-
- for (int queue = 0; queue < num_queues; queue++) {
- const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS",
- "GRAS", "SP_PS", "PS"};
- printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
-
- if (verbose) {
- printf("\t\twriter_first_block: 0x%x\n",
- data1.writer[queue].first_block);
- printf("\t\twriter_second_block: 0x%x\n",
- data1.writer_second_block[queue]);
- printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
- printf("\t\treader_first_block: 0x%x\n",
- data1.reader[queue].first_block);
- printf("\t\treader_second_block: 0x%x\n",
- data1.reader_second_block[queue]);
- printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
- printf("\t\tblock_count: %d\n", data1.block_count[queue]);
- printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
- printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
- }
-
- uint32_t cur_chunk = data1.reader[queue].chunk;
- uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
- : data1.reader_second_block[queue];
- uint32_t last_chunk = data1.writer[queue].chunk;
- uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
- : data1.writer_second_block[queue];
-
- if (verbose)
- printf("\tblock %x\n", cur_block);
- if (cur_block >= num_blocks) {
- fprintf(stderr, "block %x too large\n", cur_block);
- exit(1);
- }
- unsigned calculated_queue_size = 0;
- while (cur_block != last_block || cur_chunk != last_chunk) {
- calculated_queue_size++;
- uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
-
- dump_mem_pool_chunk(chunk_ptr);
-
- printf("\t%05x: %08x %08x %08x %08x\n",
- 4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
- chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
-
- cur_chunk++;
- if (cur_chunk == 8) {
- cur_block = next_pointers[cur_block];
- if (verbose)
- printf("\tblock %x\n", cur_block);
- if (cur_block >= num_blocks) {
- fprintf(stderr, "block %x too large\n", cur_block);
- exit(1);
- }
- cur_chunk = 0;
- }
- }
- if (calculated_queue_size != queue_sizes[queue]) {
- printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
- calculated_queue_size);
- }
- printf("\n");
- }
-}
-
-static void
decode_indexed_registers(void)
{
char *name = NULL;
@@ -923,7 +745,7 @@ decode_shader_blocks(void)
* (or parts of shaders?), so perhaps we should search
* for ends of shaders and decode each?
*/
- try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
+ try_disasm_a3xx(buf, sizedwords, 1, stdout, options.info->chip * 100);
}
if (dump)
@@ -988,8 +810,19 @@ decode(void)
while ((line = popline())) {
printf("%s", line);
if (startswith(line, "revision:")) {
- parseline(line, "revision: %u", &options.gpu_id);
- printf("Got gpu_id=%u\n", options.gpu_id);
+ unsigned core, major, minor, patchid;
+
+ parseline(line, "revision: %u (%u.%u.%u.%u)", &options.dev_id.gpu_id,
+ &core, &major, &minor, &patchid);
+
+ options.dev_id.chip_id = (core << 24) | (major << 16) | (minor << 8) | patchid;
+ options.info = fd_dev_info_raw(&options.dev_id);
+ if (!options.info) {
+ printf("Unsupported device\n");
+ break;
+ }
+
+ printf("Got chip_id=0x%"PRIx64"\n", options.dev_id.chip_id);
cffdec_init(&options);
@@ -1009,10 +842,16 @@ decode(void)
} else {
rnn_control = NULL;
}
+ } else if (startswith(line, "fault-info:")) {
+ decode_fault_info();
} else if (startswith(line, "bos:")) {
decode_bos();
} else if (startswith(line, "ringbuffer:")) {
decode_ringbuffer();
+ } else if (startswith(line, "gmu-log:")) {
+ decode_gmu_log();
+ } else if (startswith(line, "gmu-hfi:")) {
+ decode_gmu_hfi();
} else if (startswith(line, "registers:")) {
decode_registers();