diff options
Diffstat (limited to 'src/freedreno/decode/crashdec.c')
-rw-r--r-- | src/freedreno/decode/crashdec.c | 537 |
1 files changed, 188 insertions, 349 deletions
diff --git a/src/freedreno/decode/crashdec.c b/src/freedreno/decode/crashdec.c index 5e54f7fa060..35316acf320 100644 --- a/src/freedreno/decode/crashdec.c +++ b/src/freedreno/decode/crashdec.c @@ -36,54 +36,23 @@ * or times out after 5min) */ -#include <assert.h> -#include <getopt.h> -#include <inttypes.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include "freedreno_pm4.h" - -#include "ir3/instr-a3xx.h" -#include "buffers.h" -#include "cffdec.h" -#include "disasm.h" -#include "pager.h" -#include "rnnutil.h" -#include "util.h" + +#include "crashdec.h" static FILE *in; -static bool verbose; +bool verbose; + +struct rnn *rnn_gmu; +struct rnn *rnn_control; +struct rnn *rnn_pipe; -static struct rnn *rnn_gmu; -static struct rnn *rnn_control; -static struct rnn *rnn_pipe; +static uint64_t fault_iova; +static bool has_fault_iova; -static struct cffdec_options options = { +struct cffdec_options options = { .draw_filter = -1, }; -static inline bool -is_a6xx(void) -{ - return (600 <= options.gpu_id) && (options.gpu_id < 700); -} -static inline bool -is_a5xx(void) -{ - return (500 <= options.gpu_id) && (options.gpu_id < 600); -} -static inline bool -is_64b(void) -{ - return options.gpu_id >= 500; -} - /* * Helpers to read register values: */ @@ -282,10 +251,84 @@ decode_ringbuffer(void) } } +/* + * Decode GMU log + */ + +static void +decode_gmu_log(void) +{ + uint64_t iova; + uint32_t size; + + foreach_line_in_section (line) { + if (startswith(line, " iova:")) { + parseline(line, " iova: %" PRIx64, &iova); + } else if (startswith(line, " size:")) { + parseline(line, " size: %u", &size); + } else if (startswith(line, " data: !!ascii85 |")) { + void *buf = popline_ascii85(size / 4); + + dump_hex_ascii(buf, size, 1); + + free(buf); + + continue; + } + + printf("%s", line); + } +} + +/* + * Decode HFI queues + */ + +static void +decode_gmu_hfi(void) +{ + struct a6xx_hfi_state hfi = {}; + + /* Initialize the history buffers with invalid entries (-1): */ + memset(&hfi.history, 0xff, sizeof(hfi.history)); + + foreach_line_in_section (line) { + if (startswith(line, " iova:")) { + parseline(line, " iova: %" PRIx64, &hfi.iova); + } else if (startswith(line, " size:")) { + parseline(line, " size: %u", &hfi.size); + } else if (startswith(line, " queue-history")) { + unsigned qidx, dummy; + + parseline(line, " queue-history[%u]:", &qidx); + assert(qidx < ARRAY_SIZE(hfi.history)); + + parseline(line, " queue-history[%u]: %d %d %d %d %d %d %d %d", &dummy, + &hfi.history[qidx][0], &hfi.history[qidx][1], + &hfi.history[qidx][2], &hfi.history[qidx][3], + &hfi.history[qidx][4], &hfi.history[qidx][5], + &hfi.history[qidx][6], &hfi.history[qidx][7]); + } else if (startswith(line, " data: !!ascii85 |")) { + hfi.buf = popline_ascii85(hfi.size / 4); + + if (verbose) + dump_hex_ascii(hfi.buf, hfi.size, 1); + + dump_gmu_hfi(&hfi); + + free(hfi.buf); + + continue; + } + + printf("%s", line); + } +} + static bool valid_header(uint32_t pkt) { - if (options.gpu_id >= 500) { + if (options.info->chip >= 5) { return pkt_is_type4(pkt) || pkt_is_type7(pkt); } else { /* TODO maybe we can check validish looking pkt3 opc or pkt0 @@ -305,9 +348,11 @@ dump_cmdstream(void) printf("got rb_base=%" PRIx64 "\n", rb_base); options.ibs[1].base = regval64("CP_IB1_BASE"); - options.ibs[1].rem = regval("CP_IB1_REM_SIZE"); + if (is_a6xx()) + options.ibs[1].rem = regval("CP_IB1_REM_SIZE"); options.ibs[2].base = regval64("CP_IB2_BASE"); - options.ibs[2].rem = regval("CP_IB2_REM_SIZE"); + if (is_a6xx()) + options.ibs[2].rem = regval("CP_IB2_REM_SIZE"); /* Adjust remaining size to account for cmdstream slurped into ROQ * but not yet consumed by SQE @@ -319,8 +364,10 @@ dump_cmdstream(void) * by name rather than hard-coding this. */ if (is_a6xx()) { - options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16; - options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16; + uint32_t ib1_rem = regval("CP_ROQ_AVAIL_IB1") >> 16; + uint32_t ib2_rem = regval("CP_ROQ_AVAIL_IB2") >> 16; + options.ibs[1].rem += ib1_rem ? ib1_rem - 1 : 0; + options.ibs[2].rem += ib2_rem ? ib2_rem - 1 : 0; } printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem); @@ -346,6 +393,7 @@ dump_cmdstream(void) unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */ if (verbose) { + handle_prefetch(ringbuffers[id].buf, ringszdw); dump_commands(ringbuffers[id].buf, ringszdw, 0); return; } @@ -378,12 +426,31 @@ dump_cmdstream(void) buf[idx] = ringbuffers[id].buf[p]; } + handle_prefetch(buf, cmdszdw); dump_commands(buf, cmdszdw, 0); free(buf); } } /* + * Decode optional 'fault-info' section. We only get this section if + * the devcoredump was triggered by an iova fault: + */ + +static void +decode_fault_info(void) +{ + foreach_line_in_section (line) { + if (startswith(line, " - far:")) { + parseline(line, " - far: %" PRIx64, &fault_iova); + has_fault_iova = true; + } + + printf("%s", line); + } +} + +/* * Decode 'bos' (buffers) section: */ @@ -396,8 +463,32 @@ decode_bos(void) foreach_line_in_section (line) { if (startswith(line, " - iova:")) { parseline(line, " - iova: %" PRIx64, &iova); + continue; } else if (startswith(line, " size:")) { parseline(line, " size: %u", &size); + + /* + * This is a bit convoluted, vs just printing the lines as + * they come. But we want to have both the iova and size + * so we can print the end address of the buffer + */ + + uint64_t end = iova + size; + + printf(" - iova: 0x%016" PRIx64 "-0x%016" PRIx64, iova, end); + + if (has_fault_iova) { + if ((iova <= fault_iova) && (fault_iova < end)) { + /* Fault address was within what should be a mapped buffer!! */ + printf("\t=="); + } else if ((iova <= fault_iova) && (fault_iova < (end + size))) { + /* Fault address was near this mapped buffer */ + printf("\t>="); + } + } + printf("\n"); + printf(" size: %u (0x%x)\n", size, size); + continue; } else if (startswith(line, " data: !!ascii85 |")) { uint32_t *buf = popline_ascii85(size / 4); @@ -417,42 +508,51 @@ decode_bos(void) * Decode registers section: */ -static void -dump_register(struct rnn *rnn, uint32_t offset, uint32_t value) +void +dump_register(struct regacc *r) { - struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset); + struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, r->regbase); if (info && info->typeinfo) { - char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); + char *decoded = rnndec_decodeval(r->rnn->vc, info->typeinfo, r->value); printf("%s: %s\n", info->name, decoded); } else if (info) { - printf("%s: %08x\n", info->name, value); + printf("%s: %08"PRIx64"\n", info->name, r->value); } else { - printf("<%04x>: %08x\n", offset, value); + printf("<%04x>: %08"PRIx64"\n", r->regbase, r->value); } + rnn_reginfo_free(info); } static void decode_gmu_registers(void) { + struct regacc r = regacc(rnn_gmu); + foreach_line_in_section (line) { uint32_t offset, value; parseline(line, " - { offset: %x, value: %x }", &offset, &value); - printf("\t%08x\t", value); - dump_register(rnn_gmu, offset / 4, value); + if (regacc_push(&r, offset / 4, value)) { + printf("\t%08"PRIx64"\t", r.value); + dump_register(&r); + } } } static void decode_registers(void) { + struct regacc r = regacc(NULL); + foreach_line_in_section (line) { uint32_t offset, value; parseline(line, " - { offset: %x, value: %x }", &offset, &value); reg_set(offset / 4, value); - printf("\t%08x", value); - dump_register_val(offset / 4, value, 0); + if (regacc_push(&r, offset / 4, value)) { + printf("\t%08"PRIx64, r.value); + dump_register_val(&r, 0); + } } } @@ -460,6 +560,8 @@ decode_registers(void) static void decode_clusters(void) { + struct regacc r = regacc(NULL); + foreach_line_in_section (line) { if (startswith(line, " - cluster-name:") || startswith(line, " - context:")) { @@ -470,8 +572,10 @@ decode_clusters(void) uint32_t offset, value; parseline(line, " - { offset: %x, value: %x }", &offset, &value); - printf("\t%08x", value); - dump_register_val(offset / 4, value, 0); + if (regacc_push(&r, offset / 4, value)) { + printf("\t%08"PRIx64, r.value); + dump_register_val(&r, 0); + } } } @@ -509,14 +613,18 @@ dump_control_regs(uint32_t *regs) if (!rnn_control) return; + struct regacc r = regacc(rnn_control); + /* Control regs 0x100-0x17f are a scratch space to be used by the * firmware however it wants, unlike lower regs which involve some * fixed-function units. Therefore only these registers get dumped * directly. */ for (uint32_t i = 0; i < 0x80; i++) { - printf("\t%08x\t", regs[i]); - dump_register(rnn_control, i + 0x100, regs[i]); + if (regacc_push(&r, i + 0x100, regs[i])) { + printf("\t%08"PRIx64"\t", r.value); + dump_register(&r); + } } } @@ -564,292 +672,6 @@ dump_cp_ucode_dbg(uint32_t *dbg) } static void -dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, - bool pipe) -{ - if (pipe) { - struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg); - printf("\t\twrite %s (%02x) pipe\n", info->name, reg); - - if (!strcmp(info->typeinfo->name, "void")) { - /* registers that ignore their payload */ - } else { - printf("\t\t\t"); - dump_register(rnn_pipe, reg, data); - } - } else { - printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context); - dump_register_val(reg, data, 2); - } -} - -static void -dump_mem_pool_chunk(const uint32_t *chunk) -{ - struct __attribute__((packed)) { - bool reg0_enabled : 1; - bool reg1_enabled : 1; - uint32_t data0 : 32; - uint32_t data1 : 32; - uint32_t reg0 : 18; - uint32_t reg1 : 18; - bool reg0_pipe : 1; - bool reg1_pipe : 1; - uint32_t reg0_context : 1; - uint32_t reg1_context : 1; - uint32_t padding : 22; - } fields; - - memcpy(&fields, chunk, 4 * sizeof(uint32_t)); - - if (fields.reg0_enabled) { - dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, - fields.reg0_pipe); - } - - if (fields.reg1_enabled) { - dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, - fields.reg1_pipe); - } -} - -static void -dump_cp_mem_pool(uint32_t *mempool) -{ - /* The mem pool is a shared pool of memory used for storing in-flight - * register writes. There are 6 different queues, one for each - * cluster. Writing to $data (or for some special registers, $addr) - * pushes data onto the appropriate queue, and each queue is pulled - * from by the appropriate cluster. The queues are thus written to - * in-order, but may be read out-of-order. - * - * The queues are conceptually divided into 128-bit "chunks", and the - * read and write pointers are in units of chunks. These chunks are - * organized internally into 8-chunk "blocks", and memory is allocated - * dynamically in terms of blocks. Each queue is represented as a - * singly-linked list of blocks, as well as 3-bit start/end chunk - * pointers that point within the first/last block. The next pointers - * are located in a separate array, rather than inline. - */ - - /* TODO: The firmware CP_MEM_POOL save/restore routines do something - * like: - * - * cread $02, [ $00 + 0 ] - * and $02, $02, 0x118 - * ... - * brne $02, 0, #label - * mov $03, 0x2000 - * mov $03, 0x1000 - * label: - * ... - * - * I think that control register 0 is the GPU version, and some - * versions have a smaller mem pool. It seems some models have a mem - * pool that's half the size, and a bunch of offsets are shifted - * accordingly. Unfortunately the kernel driver's dumping code doesn't - * seem to take this into account, even the downstream android driver, - * and we don't know which versions 0x8, 0x10, or 0x100 correspond - * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out? - */ - bool small_mem_pool = false; - - /* The array of next pointers for each block. */ - const uint32_t *next_pointers = - small_mem_pool ? &mempool[0x800] : &mempool[0x1000]; - - /* Maximum number of blocks in the pool, also the size of the pointers - * array. - */ - const int num_blocks = small_mem_pool ? 0x30 : 0x80; - - /* Number of queues */ - const unsigned num_queues = 6; - - /* Unfortunately the per-queue state is a little more complicated than - * a simple pair of begin/end pointers. Instead of a single beginning - * block, there are *two*, with the property that either the two are - * equal or the second is the "next" of the first. Similarly there are - * two end blocks. Thus the queue either looks like this: - * - * A -> B -> ... -> C -> D - * - * Or like this, or some combination: - * - * A/B -> ... -> C/D - * - * However, there's only one beginning/end chunk offset. Now the - * question is, which of A or B is the actual start? I.e. is the chunk - * offset an offset inside A or B? It depends. I'll show a typical read - * cycle, starting here (read pointer marked with a *) with a chunk - * offset of 0: - * - * A B - * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_| - * - * Once the pointer advances far enough, the hardware decides to free - * A, after which the read-side state looks like: - * - * (free) A/B - * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_| - * - * Then after advancing the pointer a bit more, the hardware fetches - * the "next" pointer for A and stores it in B: - * - * (free) A B - * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_| - * - * Then the read pointer advances into B, at which point we've come - * back to the first state having advanced a whole block: - * - * (free) A B - * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| - * - * - * There is a similar cycle for the write pointer. Now, the question - * is, how do we know which state we're in? We need to know this to - * know whether the pointer (*) is in A or B if they're different. It - * seems like there should be some bit somewhere describing this, but - * after lots of experimentation I've come up empty-handed. For now we - * assume that if the pointer is in the first half, then we're in - * either the first or second state and use B, and otherwise we're in - * the second or third state and use A. So far I haven't seen anything - * that violates this assumption. - */ - - struct { - uint32_t unk0; - uint32_t padding0[7]; /* Mirrors of unk0 */ - - struct { - uint32_t chunk : 3; - uint32_t first_block : 32 - 3; - } writer[6]; - uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */ - - uint32_t unk1; - uint32_t padding2[7]; /* Mirrors of unk1 */ - - uint32_t writer_second_block[6]; - uint32_t padding3[2]; - - uint32_t unk2[6]; - uint32_t padding4[2]; - - struct { - uint32_t chunk : 3; - uint32_t first_block : 32 - 3; - } reader[6]; - uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */ - - uint32_t unk3; - uint32_t padding6[7]; /* Mirrors of unk3 */ - - uint32_t reader_second_block[6]; - uint32_t padding7[2]; - - uint32_t block_count[6]; - uint32_t padding[2]; - - uint32_t unk4; - uint32_t padding9[7]; /* Mirrors of unk4 */ - } data1; - - const uint32_t *data1_ptr = - small_mem_pool ? &mempool[0xc00] : &mempool[0x1800]; - memcpy(&data1, data1_ptr, sizeof(data1)); - - /* Based on the kernel, the first dword is the mem pool size (in - * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE. - */ - const uint32_t *data2_ptr = - small_mem_pool ? &mempool[0x1000] : &mempool[0x2000]; - const int data2_size = 0x60; - - /* This seems to be the size of each queue in chunks. */ - const uint32_t *queue_sizes = &data2_ptr[0x18]; - - printf("\tdata2:\n"); - dump_hex_ascii(data2_ptr, 4 * data2_size, 1); - - /* These seem to be some kind of counter of allocated/deallocated blocks */ - if (verbose) { - printf("\tunk0: %x\n", data1.unk0); - printf("\tunk1: %x\n", data1.unk1); - printf("\tunk3: %x\n", data1.unk3); - printf("\tunk4: %x\n\n", data1.unk4); - } - - for (int queue = 0; queue < num_queues; queue++) { - const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS", - "GRAS", "SP_PS", "PS"}; - printf("\tCLUSTER_%s:\n\n", cluster_names[queue]); - - if (verbose) { - printf("\t\twriter_first_block: 0x%x\n", - data1.writer[queue].first_block); - printf("\t\twriter_second_block: 0x%x\n", - data1.writer_second_block[queue]); - printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk); - printf("\t\treader_first_block: 0x%x\n", - data1.reader[queue].first_block); - printf("\t\treader_second_block: 0x%x\n", - data1.reader_second_block[queue]); - printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk); - printf("\t\tblock_count: %d\n", data1.block_count[queue]); - printf("\t\tunk2: 0x%x\n", data1.unk2[queue]); - printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]); - } - - uint32_t cur_chunk = data1.reader[queue].chunk; - uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block - : data1.reader_second_block[queue]; - uint32_t last_chunk = data1.writer[queue].chunk; - uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block - : data1.writer_second_block[queue]; - - if (verbose) - printf("\tblock %x\n", cur_block); - if (cur_block >= num_blocks) { - fprintf(stderr, "block %x too large\n", cur_block); - exit(1); - } - unsigned calculated_queue_size = 0; - while (cur_block != last_block || cur_chunk != last_chunk) { - calculated_queue_size++; - uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4]; - - dump_mem_pool_chunk(chunk_ptr); - - printf("\t%05x: %08x %08x %08x %08x\n", - 4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0], - chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]); - - cur_chunk++; - if (cur_chunk == 8) { - cur_block = next_pointers[cur_block]; - if (verbose) - printf("\tblock %x\n", cur_block); - if (cur_block >= num_blocks) { - fprintf(stderr, "block %x too large\n", cur_block); - exit(1); - } - cur_chunk = 0; - } - } - if (calculated_queue_size != queue_sizes[queue]) { - printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", - calculated_queue_size); - } - printf("\n"); - } -} - -static void decode_indexed_registers(void) { char *name = NULL; @@ -923,7 +745,7 @@ decode_shader_blocks(void) * (or parts of shaders?), so perhaps we should search * for ends of shaders and decode each? */ - try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id); + try_disasm_a3xx(buf, sizedwords, 1, stdout, options.info->chip * 100); } if (dump) @@ -988,8 +810,19 @@ decode(void) while ((line = popline())) { printf("%s", line); if (startswith(line, "revision:")) { - parseline(line, "revision: %u", &options.gpu_id); - printf("Got gpu_id=%u\n", options.gpu_id); + unsigned core, major, minor, patchid; + + parseline(line, "revision: %u (%u.%u.%u.%u)", &options.dev_id.gpu_id, + &core, &major, &minor, &patchid); + + options.dev_id.chip_id = (core << 24) | (major << 16) | (minor << 8) | patchid; + options.info = fd_dev_info_raw(&options.dev_id); + if (!options.info) { + printf("Unsupported device\n"); + break; + } + + printf("Got chip_id=0x%"PRIx64"\n", options.dev_id.chip_id); cffdec_init(&options); @@ -1009,10 +842,16 @@ decode(void) } else { rnn_control = NULL; } + } else if (startswith(line, "fault-info:")) { + decode_fault_info(); } else if (startswith(line, "bos:")) { decode_bos(); } else if (startswith(line, "ringbuffer:")) { decode_ringbuffer(); + } else if (startswith(line, "gmu-log:")) { + decode_gmu_log(); + } else if (startswith(line, "gmu-hfi:")) { + decode_gmu_hfi(); } else if (startswith(line, "registers:")) { decode_registers(); |